1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import android.content.Context;
import android.content.res.AssetFileDescriptor;
import android.util.Log;
import java.io.File;
import java.util.Arrays;
/**
* Implements a static, compacted, binary dictionary of standard words.
*/
public class BinaryDictionary extends Dictionary {
/**
* There is difference between what java and native code can handle.
* This value should only be used in BinaryDictionary.java
* It is necessary to keep it at this value because some languages e.g. German have
* really long words.
*/
protected static final int MAX_WORD_LENGTH = 48;
private static final String TAG = "BinaryDictionary";
private static final int MAX_ALTERNATIVES = 16;
private static final int MAX_WORDS = 18;
private static final int MAX_BIGRAMS = 60;
private static final int TYPED_LETTER_MULTIPLIER = 2;
private static final BinaryDictionary sInstance = new BinaryDictionary();
private int mDicTypeId;
private int mNativeDict;
private long mDictLength;
private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES];
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
private final int[] mFrequencies = new int[MAX_WORDS];
private final int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
static {
try {
System.loadLibrary("jni_latinime");
} catch (UnsatisfiedLinkError ule) {
Log.e(TAG, "Could not load native library jni_latinime");
}
}
private BinaryDictionary() {
}
/**
* Initialize a dictionary from a raw resource file
* @param context application context for reading resources
* @param resId the resource containing the raw binary dictionary
* @return initialized instance of BinaryDictionary
*/
public static BinaryDictionary initDictionary(Context context, int resId, int dicTypeId) {
synchronized (sInstance) {
sInstance.closeInternal();
try {
final AssetFileDescriptor afd = context.getResources().openRawResourceFd(resId);
if (afd == null) {
Log.e(TAG, "Found the resource but it is compressed. resId=" + resId);
return null;
}
final String sourceDir = context.getApplicationInfo().sourceDir;
final File packagePath = new File(sourceDir);
// TODO: Come up with a way to handle a directory.
if (!packagePath.isFile()) {
Log.e(TAG, "sourceDir is not a file: " + sourceDir);
return null;
}
sInstance.loadDictionary(sourceDir, afd.getStartOffset(), afd.getLength());
sInstance.mDicTypeId = dicTypeId;
} catch (android.content.res.Resources.NotFoundException e) {
Log.e(TAG, "Could not find the resource. resId=" + resId);
return null;
}
}
return sInstance;
}
// For unit test
/* package */ static BinaryDictionary initDictionary(File dictionary, long startOffset,
long length, int dicTypeId) {
synchronized (sInstance) {
sInstance.closeInternal();
if (dictionary.isFile()) {
sInstance.loadDictionary(dictionary.getAbsolutePath(), startOffset, length);
sInstance.mDicTypeId = dicTypeId;
} else {
Log.e(TAG, "Could not find the file. path=" + dictionary.getAbsolutePath());
return null;
}
}
return sInstance;
}
private native int openNative(String sourceDir, long dictOffset, long dictSize,
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength,
int maxWords, int maxAlternatives);
private native void closeNative(int dict);
private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize,
char[] outputChars, int[] frequencies);
private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength,
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] frequencies,
int maxWordLength, int maxBigrams, int maxAlternatives);
private final void loadDictionary(String path, long startOffset, long length) {
mNativeDict = openNative(path, startOffset, length,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES);
mDictLength = length;
}
@Override
public void getBigrams(final WordComposer codes, final CharSequence previousWord,
final WordCallback callback) {
if (mNativeDict == 0) return;
char[] chars = previousWord.toString().toCharArray();
Arrays.fill(mOutputChars_bigrams, (char) 0);
Arrays.fill(mFrequencies_bigrams, 0);
int codesSize = codes.size();
Arrays.fill(mInputCodes, -1);
int[] alternatives = codes.getCodesAt(0);
System.arraycopy(alternatives, 0, mInputCodes, 0,
Math.min(alternatives.length, MAX_ALTERNATIVES));
int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
mOutputChars_bigrams, mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS,
MAX_ALTERNATIVES);
for (int j = 0; j < count; ++j) {
if (mFrequencies_bigrams[j] < 1) break;
final int start = j * MAX_WORD_LENGTH;
int len = 0;
while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) {
++len;
}
if (len > 0) {
callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j],
mDicTypeId, DataType.BIGRAM);
}
}
}
@Override
public void getWords(final WordComposer codes, final WordCallback callback) {
if (mNativeDict == 0) return;
final int codesSize = codes.size();
// Won't deal with really long words.
if (codesSize > MAX_WORD_LENGTH - 1) return;
Arrays.fill(mInputCodes, WordComposer.NOT_A_CODE);
for (int i = 0; i < codesSize; i++) {
int[] alternatives = codes.getCodesAt(i);
System.arraycopy(alternatives, 0, mInputCodes, i * MAX_ALTERNATIVES,
Math.min(alternatives.length, MAX_ALTERNATIVES));
}
Arrays.fill(mOutputChars, (char) 0);
Arrays.fill(mFrequencies, 0);
int count = getSuggestionsNative(mNativeDict, mInputCodes, codesSize, mOutputChars,
mFrequencies);
for (int j = 0; j < count; ++j) {
if (mFrequencies[j] < 1) break;
final int start = j * MAX_WORD_LENGTH;
int len = 0;
while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) {
++len;
}
if (len > 0) {
callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId,
DataType.UNIGRAM);
}
}
}
@Override
public boolean isValidWord(CharSequence word) {
if (word == null) return false;
char[] chars = word.toString().toCharArray();
return isValidWordNative(mNativeDict, chars, chars.length);
}
public long getSize() {
return mDictLength; // This value is initialized in loadDictionary()
}
@Override
public synchronized void close() {
closeInternal();
}
private void closeInternal() {
if (mNativeDict != 0) {
closeNative(mNativeDict);
mNativeDict = 0;
mDictLength = 0;
}
}
@Override
protected void finalize() throws Throwable {
try {
closeInternal();
} finally {
super.finalize();
}
}
}
|