diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/BinaryDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 202 |
1 files changed, 187 insertions, 15 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index fd296988e..c450a1d4f 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -17,19 +17,28 @@ package com.android.inputmethod.latin; import android.text.TextUtils; +import android.util.Log; import android.util.SparseArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; +import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.settings.NativeSuggestOptions; import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.JniUtils; +import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.StringUtils; import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -57,10 +66,27 @@ public final class BinaryDictionary extends Dictionary { @UsedForTesting public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; + public static final int NOT_A_VALID_TIMESTAMP = -1; + + // Format to get unigram flags from native side via getWordPropertyNative(). + private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4; + private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; + private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; + private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; + private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; + + // Format to get probability and historical info from native side via getWordPropertyNative(). + public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; + public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; + public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; + public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; + public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; + private long mNativeDict; private final Locale mLocale; private final long mDictSize; private final String mDictFilePath; + private final boolean mIsUpdatable; private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS]; private final int[] mSpaceIndices = new int[MAX_RESULTS]; @@ -107,6 +133,7 @@ public final class BinaryDictionary extends Dictionary { mLocale = locale; mDictSize = length; mDictFilePath = filename; + mIsUpdatable = isUpdatable; mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); loadDictionary(filename, offset, length, isUpdatable); } @@ -116,15 +143,24 @@ public final class BinaryDictionary extends Dictionary { } private static native boolean createEmptyDictFileNative(String filePath, long dictVersion, - String[] attributeKeyStringArray, String[] attributeValueStringArray); + String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); + private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, + int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, + ArrayList<int[]> outAttributeValues); private static native void flushNative(long dict, String filePath); private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); private static native void flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); + private static native int getFormatVersionNative(long dict); private static native int getProbabilityNative(long dict, int[] word); private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); + private static native void getWordPropertyNative(long dict, int[] word, + int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, + ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, + ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); + private static native int getNextWordNative(long dict, int token, int[] outCodePoints); private static native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, @@ -133,17 +169,22 @@ public final class BinaryDictionary extends Dictionary { int[] outputAutoCommitFirstWordConfidence); private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); private static native int editDistanceNative(int[] before, int[] after); - private static native void addUnigramWordNative(long dict, int[] word, int probability); + private static native void addUnigramWordNative(long dict, int[] word, int probability, + int[] shortcutTarget, int shortcutProbability, boolean isNotAWord, + boolean isBlacklisted, int timestamp); private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, - int probability); + int probability, int timestamp); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); + private static native int addMultipleDictionaryEntriesNative(long dict, + LanguageModelParam[] languageModelParams, int startIndex); private static native int calculateProbabilityNative(long dict, int unigramProbability, int bigramProbability); + private static native int setCurrentTimeForTestNative(int currentTime); private static native String getPropertyNative(long dict, String query); + private static native boolean isCorruptedNative(long dict); - @UsedForTesting public static boolean createEmptyDictFile(final String filePath, final long dictVersion, - final Map<String, String> attributeMap) { + final Locale locale, final Map<String, String> attributeMap) { final String[] keyArray = new String[attributeMap.size()]; final String[] valueArray = new String[attributeMap.size()]; int index = 0; @@ -152,7 +193,8 @@ public final class BinaryDictionary extends Dictionary { valueArray[index] = attributeMap.get(key); index++; } - return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray); + return createEmptyDictFileNative(filePath, dictVersion, locale.toString(), keyArray, + valueArray); } // TODO: Move native dict into session @@ -161,6 +203,48 @@ public final class BinaryDictionary extends Dictionary { mNativeDict = openNative(path, startOffset, length, isUpdatable); } + // TODO: Check isCorrupted() for main dictionaries. + public boolean isCorrupted() { + if (!isValidDictionary()) { + return false; + } + if (!isCorruptedNative(mNativeDict)) { + return false; + } + // TODO: Record the corruption. + Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); + Log.e(TAG, "locale: " + mLocale); + Log.e(TAG, "dict size: " + mDictSize); + Log.e(TAG, "updatable: " + mIsUpdatable); + return true; + } + + @UsedForTesting + public DictionaryHeader getHeader() throws UnsupportedFormatException { + if (mNativeDict == 0) { + return null; + } + final int[] outHeaderSize = new int[1]; + final int[] outFormatVersion = new int[1]; + final ArrayList<int[]> outAttributeKeys = CollectionUtils.newArrayList(); + final ArrayList<int[]> outAttributeValues = CollectionUtils.newArrayList(); + getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, + outAttributeValues); + final HashMap<String, String> attributes = new HashMap<String, String>(); + for (int i = 0; i < outAttributeKeys.size(); i++) { + final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeKeys.get(i)); + final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeValues.get(i)); + attributes.put(attributeKey, attributeValue); + } + final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( + attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); + return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), + new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); + } + + @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo, @@ -235,6 +319,10 @@ public final class BinaryDictionary extends Dictionary { return mNativeDict != 0; } + public int getFormatVersion() { + return getFormatVersionNative(mNativeDict); + } + public static float calcNormalizedScore(final String before, final String after, final int score) { return calcNormalizedScoreNative(StringUtils.toCodePointArray(before), @@ -274,23 +362,75 @@ public final class BinaryDictionary extends Dictionary { return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); } - // Add a unigram entry to binary dictionary in native code. - public void addUnigramWord(final String word, final int probability) { + public WordProperty getWordProperty(final String word) { + if (TextUtils.isEmpty(word)) { + return null; + } + final int[] codePoints = StringUtils.toCodePointArray(word); + final int[] outCodePoints = new int[MAX_WORD_LENGTH]; + final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; + final int[] outProbabilityInfo = + new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; + final ArrayList<int[]> outBigramTargets = CollectionUtils.newArrayList(); + final ArrayList<int[]> outBigramProbabilityInfo = CollectionUtils.newArrayList(); + final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList(); + final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList(); + getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + return new WordProperty(codePoints, + outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + } + + public static class GetNextWordPropertyResult { + public WordProperty mWordProperty; + public int mNextToken; + + public GetNextWordPropertyResult(final WordProperty wordPreperty, final int nextToken) { + mWordProperty = wordPreperty; + mNextToken = nextToken; + } + } + + /** + * Method to iterate all words in the dictionary for makedict. + * If token is 0, this method newly starts iterating the dictionary. + */ + public GetNextWordPropertyResult getNextWordProperty(final int token) { + final int[] codePoints = new int[MAX_WORD_LENGTH]; + final int nextToken = getNextWordNative(mNativeDict, token, codePoints); + final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); + return new GetNextWordPropertyResult(getWordProperty(word), nextToken); + } + + // Add a unigram entry to binary dictionary with unigram attributes in native code. + public void addUnigramWord(final String word, final int probability, + final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, + final boolean isBlacklisted, final int timestamp) { if (TextUtils.isEmpty(word)) { return; } final int[] codePoints = StringUtils.toCodePointArray(word); - addUnigramWordNative(mNativeDict, codePoints, probability); + final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? + StringUtils.toCodePointArray(shortcutTarget) : null; + addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, + shortcutProbability, isNotAWord, isBlacklisted, timestamp); } - // Add a bigram entry to binary dictionary in native code. - public void addBigramWords(final String word0, final String word1, final int probability) { + // Add a bigram entry to binary dictionary with timestamp in native code. + public void addBigramWords(final String word0, final String word1, final int probability, + final int timestamp) { if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { return; } final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); - addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability); + addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp); } // Remove a bigram entry form binary dictionary in native code. @@ -303,11 +443,29 @@ public final class BinaryDictionary extends Dictionary { removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); } + public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { + if (!isValidDictionary()) return; + int processedParamCount = 0; + while (processedParamCount < languageModelParams.length) { + if (needsToRunGC(true /* mindsBlockByGC */)) { + flushWithGC(); + } + processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict, + languageModelParams, processedParamCount); + if (processedParamCount <= 0) { + return; + } + } + } + private void reopen() { close(); final File dictFile = new File(mDictFilePath); - mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, - dictFile.length(), true /* isUpdatable */); + // WARNING: Because we pass 0 as the offset and file.length() as the length, this can + // only be called for actual files. Right now it's only called by the flush() family of + // functions, which require an updatable dictionary, so it's okay. But beware. + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); } public void flush() { @@ -339,8 +497,22 @@ public final class BinaryDictionary extends Dictionary { return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); } + /** + * Control the current time to be used in the native code. If currentTime >= 0, this method sets + * the current time and gets into test mode. + * In test mode, set timestamp is used as the current time in the native code. + * If currentTime < 0, quit the test mode and returns to using time() to get the current time. + * + * @param currentTime seconds since the unix epoch + * @return current time got in the native code. + */ + @UsedForTesting + public static int setCurrentTimeForTest(final int currentTime) { + return setCurrentTimeForTestNative(currentTime); + } + @UsedForTesting - public String getPropertyForTests(String query) { + public String getPropertyForTest(final String query) { if (!isValidDictionary()) return ""; return getPropertyNative(mNativeDict, query); } |