aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/BinaryDictionary.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/BinaryDictionary.java')
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java202
1 files changed, 187 insertions, 15 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index fd296988e..c450a1d4f 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -17,19 +17,28 @@
package com.android.inputmethod.latin;
import android.text.TextUtils;
+import android.util.Log;
import android.util.SparseArray;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.settings.NativeSuggestOptions;
import com.android.inputmethod.latin.utils.CollectionUtils;
+import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.JniUtils;
+import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.StringUtils;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -57,10 +66,27 @@ public final class BinaryDictionary extends Dictionary {
@UsedForTesting
public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+ public static final int NOT_A_VALID_TIMESTAMP = -1;
+
+ // Format to get unigram flags from native side via getWordPropertyNative().
+ private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4;
+ private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
+ private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1;
+ private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2;
+ private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
+
+ // Format to get probability and historical info from native side via getWordPropertyNative().
+ public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
+ public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
+ public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
+ public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
+ public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
+
private long mNativeDict;
private final Locale mLocale;
private final long mDictSize;
private final String mDictFilePath;
+ private final boolean mIsUpdatable;
private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
private final int[] mSpaceIndices = new int[MAX_RESULTS];
@@ -107,6 +133,7 @@ public final class BinaryDictionary extends Dictionary {
mLocale = locale;
mDictSize = length;
mDictFilePath = filename;
+ mIsUpdatable = isUpdatable;
mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
loadDictionary(filename, offset, length, isUpdatable);
}
@@ -116,15 +143,24 @@ public final class BinaryDictionary extends Dictionary {
}
private static native boolean createEmptyDictFileNative(String filePath, long dictVersion,
- String[] attributeKeyStringArray, String[] attributeValueStringArray);
+ String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
private static native long openNative(String sourceDir, long dictOffset, long dictSize,
boolean isUpdatable);
+ private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
+ int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
+ ArrayList<int[]> outAttributeValues);
private static native void flushNative(long dict, String filePath);
private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
private static native void flushWithGCNative(long dict, String filePath);
private static native void closeNative(long dict);
+ private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
+ private static native void getWordPropertyNative(long dict, int[] word,
+ int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
+ ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
+ ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
+ private static native int getNextWordNative(long dict, int token, int[] outCodePoints);
private static native int getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
@@ -133,17 +169,22 @@ public final class BinaryDictionary extends Dictionary {
int[] outputAutoCommitFirstWordConfidence);
private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
private static native int editDistanceNative(int[] before, int[] after);
- private static native void addUnigramWordNative(long dict, int[] word, int probability);
+ private static native void addUnigramWordNative(long dict, int[] word, int probability,
+ int[] shortcutTarget, int shortcutProbability, boolean isNotAWord,
+ boolean isBlacklisted, int timestamp);
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
- int probability);
+ int probability, int timestamp);
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
+ private static native int addMultipleDictionaryEntriesNative(long dict,
+ LanguageModelParam[] languageModelParams, int startIndex);
private static native int calculateProbabilityNative(long dict, int unigramProbability,
int bigramProbability);
+ private static native int setCurrentTimeForTestNative(int currentTime);
private static native String getPropertyNative(long dict, String query);
+ private static native boolean isCorruptedNative(long dict);
- @UsedForTesting
public static boolean createEmptyDictFile(final String filePath, final long dictVersion,
- final Map<String, String> attributeMap) {
+ final Locale locale, final Map<String, String> attributeMap) {
final String[] keyArray = new String[attributeMap.size()];
final String[] valueArray = new String[attributeMap.size()];
int index = 0;
@@ -152,7 +193,8 @@ public final class BinaryDictionary extends Dictionary {
valueArray[index] = attributeMap.get(key);
index++;
}
- return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray);
+ return createEmptyDictFileNative(filePath, dictVersion, locale.toString(), keyArray,
+ valueArray);
}
// TODO: Move native dict into session
@@ -161,6 +203,48 @@ public final class BinaryDictionary extends Dictionary {
mNativeDict = openNative(path, startOffset, length, isUpdatable);
}
+ // TODO: Check isCorrupted() for main dictionaries.
+ public boolean isCorrupted() {
+ if (!isValidDictionary()) {
+ return false;
+ }
+ if (!isCorruptedNative(mNativeDict)) {
+ return false;
+ }
+ // TODO: Record the corruption.
+ Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
+ Log.e(TAG, "locale: " + mLocale);
+ Log.e(TAG, "dict size: " + mDictSize);
+ Log.e(TAG, "updatable: " + mIsUpdatable);
+ return true;
+ }
+
+ @UsedForTesting
+ public DictionaryHeader getHeader() throws UnsupportedFormatException {
+ if (mNativeDict == 0) {
+ return null;
+ }
+ final int[] outHeaderSize = new int[1];
+ final int[] outFormatVersion = new int[1];
+ final ArrayList<int[]> outAttributeKeys = CollectionUtils.newArrayList();
+ final ArrayList<int[]> outAttributeValues = CollectionUtils.newArrayList();
+ getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
+ outAttributeValues);
+ final HashMap<String, String> attributes = new HashMap<String, String>();
+ for (int i = 0; i < outAttributeKeys.size(); i++) {
+ final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
+ outAttributeKeys.get(i));
+ final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
+ outAttributeValues.get(i));
+ attributes.put(attributeKey, attributeValue);
+ }
+ final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
+ attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
+ return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
+ new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
+ }
+
+
@Override
public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
final String prevWord, final ProximityInfo proximityInfo,
@@ -235,6 +319,10 @@ public final class BinaryDictionary extends Dictionary {
return mNativeDict != 0;
}
+ public int getFormatVersion() {
+ return getFormatVersionNative(mNativeDict);
+ }
+
public static float calcNormalizedScore(final String before, final String after,
final int score) {
return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
@@ -274,23 +362,75 @@ public final class BinaryDictionary extends Dictionary {
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
}
- // Add a unigram entry to binary dictionary in native code.
- public void addUnigramWord(final String word, final int probability) {
+ public WordProperty getWordProperty(final String word) {
+ if (TextUtils.isEmpty(word)) {
+ return null;
+ }
+ final int[] codePoints = StringUtils.toCodePointArray(word);
+ final int[] outCodePoints = new int[MAX_WORD_LENGTH];
+ final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
+ final int[] outProbabilityInfo =
+ new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
+ final ArrayList<int[]> outBigramTargets = CollectionUtils.newArrayList();
+ final ArrayList<int[]> outBigramProbabilityInfo = CollectionUtils.newArrayList();
+ final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
+ final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
+ getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo,
+ outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outShortcutProbabilities);
+ return new WordProperty(codePoints,
+ outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
+ outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
+ outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX],
+ outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo,
+ outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outShortcutProbabilities);
+ }
+
+ public static class GetNextWordPropertyResult {
+ public WordProperty mWordProperty;
+ public int mNextToken;
+
+ public GetNextWordPropertyResult(final WordProperty wordPreperty, final int nextToken) {
+ mWordProperty = wordPreperty;
+ mNextToken = nextToken;
+ }
+ }
+
+ /**
+ * Method to iterate all words in the dictionary for makedict.
+ * If token is 0, this method newly starts iterating the dictionary.
+ */
+ public GetNextWordPropertyResult getNextWordProperty(final int token) {
+ final int[] codePoints = new int[MAX_WORD_LENGTH];
+ final int nextToken = getNextWordNative(mNativeDict, token, codePoints);
+ final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
+ return new GetNextWordPropertyResult(getWordProperty(word), nextToken);
+ }
+
+ // Add a unigram entry to binary dictionary with unigram attributes in native code.
+ public void addUnigramWord(final String word, final int probability,
+ final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
+ final boolean isBlacklisted, final int timestamp) {
if (TextUtils.isEmpty(word)) {
return;
}
final int[] codePoints = StringUtils.toCodePointArray(word);
- addUnigramWordNative(mNativeDict, codePoints, probability);
+ final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
+ StringUtils.toCodePointArray(shortcutTarget) : null;
+ addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
+ shortcutProbability, isNotAWord, isBlacklisted, timestamp);
}
- // Add a bigram entry to binary dictionary in native code.
- public void addBigramWords(final String word0, final String word1, final int probability) {
+ // Add a bigram entry to binary dictionary with timestamp in native code.
+ public void addBigramWords(final String word0, final String word1, final int probability,
+ final int timestamp) {
if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
return;
}
final int[] codePoints0 = StringUtils.toCodePointArray(word0);
final int[] codePoints1 = StringUtils.toCodePointArray(word1);
- addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability);
+ addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp);
}
// Remove a bigram entry form binary dictionary in native code.
@@ -303,11 +443,29 @@ public final class BinaryDictionary extends Dictionary {
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
}
+ public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
+ if (!isValidDictionary()) return;
+ int processedParamCount = 0;
+ while (processedParamCount < languageModelParams.length) {
+ if (needsToRunGC(true /* mindsBlockByGC */)) {
+ flushWithGC();
+ }
+ processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
+ languageModelParams, processedParamCount);
+ if (processedParamCount <= 0) {
+ return;
+ }
+ }
+ }
+
private void reopen() {
close();
final File dictFile = new File(mDictFilePath);
- mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */,
- dictFile.length(), true /* isUpdatable */);
+ // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
+ // only be called for actual files. Right now it's only called by the flush() family of
+ // functions, which require an updatable dictionary, so it's okay. But beware.
+ loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
+ dictFile.length(), mIsUpdatable);
}
public void flush() {
@@ -339,8 +497,22 @@ public final class BinaryDictionary extends Dictionary {
return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
}
+ /**
+ * Control the current time to be used in the native code. If currentTime >= 0, this method sets
+ * the current time and gets into test mode.
+ * In test mode, set timestamp is used as the current time in the native code.
+ * If currentTime < 0, quit the test mode and returns to using time() to get the current time.
+ *
+ * @param currentTime seconds since the unix epoch
+ * @return current time got in the native code.
+ */
+ @UsedForTesting
+ public static int setCurrentTimeForTest(final int currentTime) {
+ return setCurrentTimeForTestNative(currentTime);
+ }
+
@UsedForTesting
- public String getPropertyForTests(String query) {
+ public String getPropertyForTest(final String query) {
if (!isValidDictionary()) return "";
return getPropertyNative(mNativeDict, query);
}