diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/BinaryDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 349 |
1 files changed, 281 insertions, 68 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index fd296988e..b88509fde 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -17,19 +17,29 @@ package com.android.inputmethod.latin; import android.text.TextUtils; +import android.util.Log; import android.util.SparseArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; +import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.settings.NativeSuggestOptions; +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.JniUtils; +import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.StringUtils; import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -57,17 +67,40 @@ public final class BinaryDictionary extends Dictionary { @UsedForTesting public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; + public static final int NOT_A_VALID_TIMESTAMP = -1; + + // Format to get unigram flags from native side via getWordPropertyNative(). + private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4; + private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; + private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; + private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; + private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; + + // Format to get probability and historical info from native side via getWordPropertyNative(). + public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; + public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; + public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; + public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; + public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; + + public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate"; + private long mNativeDict; private final Locale mLocale; private final long mDictSize; private final String mDictFilePath; + private final boolean mIsUpdatable; + private boolean mHasUpdated; + private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; + private final int[] mOutputSuggestionCount = new int[1]; private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS]; private final int[] mSpaceIndices = new int[MAX_RESULTS]; private final int[] mOutputScores = new int[MAX_RESULTS]; private final int[] mOutputTypes = new int[MAX_RESULTS]; // Only one result is ever used private final int[] mOutputAutoCommitFirstWordConfidence = new int[1]; + private final float[] mInputOutputLanguageWeight = new float[1]; private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions(); @@ -91,8 +124,7 @@ public final class BinaryDictionary extends Dictionary { } /** - * Constructor for the binary dictionary. This is supposed to be called from the - * dictionary factory. + * Constructs binary dictionary using existing dictionary file. * @param filename the name of the file to read through native code. * @param offset the offset of the dictionary data within the file. * @param length the length of the binary data. @@ -107,100 +139,188 @@ public final class BinaryDictionary extends Dictionary { mLocale = locale; mDictSize = length; mDictFilePath = filename; + mIsUpdatable = isUpdatable; + mHasUpdated = false; mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); loadDictionary(filename, offset, length, isUpdatable); } + /** + * Constructs binary dictionary on memory. + * @param filename the name of the file used to flush. + * @param useFullEditDistance whether to use the full edit distance in suggestions + * @param dictType the dictionary type, as a human-readable string + * @param formatVersion the format version of the dictionary + * @param attributeMap the attributes of the dictionary + */ + @UsedForTesting + public BinaryDictionary(final String filename, final boolean useFullEditDistance, + final Locale locale, final String dictType, final long formatVersion, + final Map<String, String> attributeMap) { + super(dictType); + mLocale = locale; + mDictSize = 0; + mDictFilePath = filename; + // On memory dictionary is always updatable. + mIsUpdatable = true; + mHasUpdated = false; + mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); + final String[] keyArray = new String[attributeMap.size()]; + final String[] valueArray = new String[attributeMap.size()]; + int index = 0; + for (final String key : attributeMap.keySet()) { + keyArray[index] = key; + valueArray[index] = attributeMap.get(key); + index++; + } + mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray); + } + + static { JniUtils.loadNativeLibrary(); } - private static native boolean createEmptyDictFileNative(String filePath, long dictVersion, - String[] attributeKeyStringArray, String[] attributeValueStringArray); private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); + private static native long createOnMemoryNative(long formatVersion, + String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); + private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, + int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, + ArrayList<int[]> outAttributeValues); private static native void flushNative(long dict, String filePath); private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); private static native void flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); + private static native int getFormatVersionNative(long dict); private static native int getProbabilityNative(long dict, int[] word); private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); - private static native int getSuggestionsNative(long dict, long proximityInfo, + private static native void getWordPropertyNative(long dict, int[] word, + int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, + ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, + ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); + private static native int getNextWordNative(long dict, int token, int[] outCodePoints); + private static native void getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, - int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, - int[] suggestOptions, int[] prevWordCodePointArray, - int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, - int[] outputAutoCommitFirstWordConfidence); - private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); - private static native int editDistanceNative(int[] before, int[] after); - private static native void addUnigramWordNative(long dict, int[] word, int probability); + int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, + int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints, + int[] outputScores, int[] outputIndices, int[] outputTypes, + int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight); + private static native void addUnigramWordNative(long dict, int[] word, int probability, + int[] shortcutTarget, int shortcutProbability, boolean isNotAWord, + boolean isBlacklisted, int timestamp); private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, - int probability); + int probability, int timestamp); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); + private static native int addMultipleDictionaryEntriesNative(long dict, + LanguageModelParam[] languageModelParams, int startIndex); private static native int calculateProbabilityNative(long dict, int unigramProbability, int bigramProbability); private static native String getPropertyNative(long dict, String query); - - @UsedForTesting - public static boolean createEmptyDictFile(final String filePath, final long dictVersion, - final Map<String, String> attributeMap) { - final String[] keyArray = new String[attributeMap.size()]; - final String[] valueArray = new String[attributeMap.size()]; - int index = 0; - for (final String key : attributeMap.keySet()) { - keyArray[index] = key; - valueArray[index] = attributeMap.get(key); - index++; - } - return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray); - } + private static native boolean isCorruptedNative(long dict); // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable) { + mHasUpdated = false; mNativeDict = openNative(path, startOffset, length, isUpdatable); } + // TODO: Check isCorrupted() for main dictionaries. + public boolean isCorrupted() { + if (!isValidDictionary()) { + return false; + } + if (!isCorruptedNative(mNativeDict)) { + return false; + } + // TODO: Record the corruption. + Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); + Log.e(TAG, "locale: " + mLocale); + Log.e(TAG, "dict size: " + mDictSize); + Log.e(TAG, "updatable: " + mIsUpdatable); + return true; + } + + public DictionaryHeader getHeader() throws UnsupportedFormatException { + if (mNativeDict == 0) { + return null; + } + final int[] outHeaderSize = new int[1]; + final int[] outFormatVersion = new int[1]; + final ArrayList<int[]> outAttributeKeys = CollectionUtils.newArrayList(); + final ArrayList<int[]> outAttributeValues = CollectionUtils.newArrayList(); + getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, + outAttributeValues); + final HashMap<String, String> attributes = new HashMap<String, String>(); + for (int i = 0; i < outAttributeKeys.size(); i++) { + final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeKeys.get(i)); + final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeValues.get(i)); + attributes.put(attributeKey, attributeValue); + } + final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( + attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); + return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), + new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); + } + + @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo, - final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { + final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, + final float[] inOutLanguageWeight) { return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords, - additionalFeaturesOptions, 0 /* sessionId */); + additionalFeaturesOptions, 0 /* sessionId */, inOutLanguageWeight); } @Override public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo, final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, - final int sessionId) { - if (!isValidDictionary()) return null; + final int sessionId, final float[] inOutLanguageWeight) { + if (!isValidDictionary()) { + return null; + } Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE); // TODO: toLowerCase in the native code final int[] prevWordCodePointArray = (null == prevWord) ? null : StringUtils.toCodePointArray(prevWord); - final int composerSize = composer.size(); - + final InputPointers inputPointers = composer.getInputPointers(); final boolean isGesture = composer.isBatchMode(); - if (composerSize <= 1 || !isGesture) { - if (composerSize > MAX_WORD_LENGTH - 1) return null; - for (int i = 0; i < composerSize; i++) { - mInputCodePoints[i] = composer.getCodeAt(i); + final int inputSize; + if (!isGesture) { + inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( + mInputCodePoints); + if (inputSize < 0) { + return null; } + } else { + inputSize = inputPointers.getPointerSize(); } - final InputPointers ips = composer.getInputPointers(); - final int inputSize = isGesture ? ips.getPointerSize() : composerSize; mNativeSuggestOptions.setIsGesture(isGesture); mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions); + if (inOutLanguageWeight != null) { + mInputOutputLanguageWeight[0] = inOutLanguageWeight[0]; + } else { + mInputOutputLanguageWeight[0] = Dictionary.NOT_A_LANGUAGE_WEIGHT; + } // proximityInfo and/or prevWordForBigrams may not be null. - final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), - getTraverseSession(sessionId).getSession(), ips.getXCoordinates(), - ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints, - inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(), - prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices, - mOutputTypes, mOutputAutoCommitFirstWordConfidence); + getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), + getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), + inputPointers.getYCoordinates(), inputPointers.getTimes(), + inputPointers.getPointerIds(), mInputCodePoints, inputSize, + mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount, + mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes, + mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight); + if (inOutLanguageWeight != null) { + inOutLanguageWeight[0] = mInputOutputLanguageWeight[0]; + } + final int count = mOutputSuggestionCount[0]; final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); for (int j = 0; j < count; ++j) { final int start = j * MAX_WORD_LENGTH; @@ -235,18 +355,8 @@ public final class BinaryDictionary extends Dictionary { return mNativeDict != 0; } - public static float calcNormalizedScore(final String before, final String after, - final int score) { - return calcNormalizedScoreNative(StringUtils.toCodePointArray(before), - StringUtils.toCodePointArray(after), score); - } - - public static int editDistance(final String before, final String after) { - if (before == null || after == null) { - throw new IllegalArgumentException(); - } - return editDistanceNative(StringUtils.toCodePointArray(before), - StringUtils.toCodePointArray(after)); + public int getFormatVersion() { + return getFormatVersionNative(mNativeDict); } @Override @@ -274,23 +384,77 @@ public final class BinaryDictionary extends Dictionary { return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); } - // Add a unigram entry to binary dictionary in native code. - public void addUnigramWord(final String word, final int probability) { + public WordProperty getWordProperty(final String word) { + if (TextUtils.isEmpty(word)) { + return null; + } + final int[] codePoints = StringUtils.toCodePointArray(word); + final int[] outCodePoints = new int[MAX_WORD_LENGTH]; + final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; + final int[] outProbabilityInfo = + new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; + final ArrayList<int[]> outBigramTargets = CollectionUtils.newArrayList(); + final ArrayList<int[]> outBigramProbabilityInfo = CollectionUtils.newArrayList(); + final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList(); + final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList(); + getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + return new WordProperty(codePoints, + outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + } + + public static class GetNextWordPropertyResult { + public WordProperty mWordProperty; + public int mNextToken; + + public GetNextWordPropertyResult(final WordProperty wordPreperty, final int nextToken) { + mWordProperty = wordPreperty; + mNextToken = nextToken; + } + } + + /** + * Method to iterate all words in the dictionary for makedict. + * If token is 0, this method newly starts iterating the dictionary. + */ + public GetNextWordPropertyResult getNextWordProperty(final int token) { + final int[] codePoints = new int[MAX_WORD_LENGTH]; + final int nextToken = getNextWordNative(mNativeDict, token, codePoints); + final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); + return new GetNextWordPropertyResult(getWordProperty(word), nextToken); + } + + // Add a unigram entry to binary dictionary with unigram attributes in native code. + public void addUnigramWord(final String word, final int probability, + final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, + final boolean isBlacklisted, final int timestamp) { if (TextUtils.isEmpty(word)) { return; } final int[] codePoints = StringUtils.toCodePointArray(word); - addUnigramWordNative(mNativeDict, codePoints, probability); + final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? + StringUtils.toCodePointArray(shortcutTarget) : null; + addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, + shortcutProbability, isNotAWord, isBlacklisted, timestamp); + mHasUpdated = true; } - // Add a bigram entry to binary dictionary in native code. - public void addBigramWords(final String word0, final String word1, final int probability) { + // Add a bigram entry to binary dictionary with timestamp in native code. + public void addBigramWords(final String word0, final String word1, final int probability, + final int timestamp) { if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { return; } final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); - addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability); + addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp); + mHasUpdated = true; } // Remove a bigram entry form binary dictionary in native code. @@ -301,21 +465,52 @@ public final class BinaryDictionary extends Dictionary { final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); + mHasUpdated = true; + } + + public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { + if (!isValidDictionary()) return; + int processedParamCount = 0; + while (processedParamCount < languageModelParams.length) { + if (needsToRunGC(true /* mindsBlockByGC */)) { + flushWithGC(); + } + processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict, + languageModelParams, processedParamCount); + mHasUpdated = true; + if (processedParamCount <= 0) { + return; + } + } } private void reopen() { close(); final File dictFile = new File(mDictFilePath); - mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, - dictFile.length(), true /* isUpdatable */); + // WARNING: Because we pass 0 as the offset and file.length() as the length, this can + // only be called for actual files. Right now it's only called by the flush() family of + // functions, which require an updatable dictionary, so it's okay. But beware. + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); } + // Flush to dict file if the dictionary has been updated. public void flush() { if (!isValidDictionary()) return; - flushNative(mNativeDict, mDictFilePath); - reopen(); + if (mHasUpdated) { + flushNative(mNativeDict, mDictFilePath); + reopen(); + } } + // Run GC and flush to dict file if the dictionary has been updated. + public void flushWithGCIfHasUpdated() { + if (mHasUpdated) { + flushWithGC(); + } + } + + // Run GC and flush to dict file. public void flushWithGC() { if (!isValidDictionary()) return; flushWithGCNative(mNativeDict, mDictFilePath); @@ -333,6 +528,24 @@ public final class BinaryDictionary extends Dictionary { return needsToRunGCNative(mNativeDict, mindsBlockByGC); } + public boolean migrateTo(final int newFormatVersion) { + if (!isValidDictionary()) { + return false; + } + final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION; + // TODO: Implement migrateNative(tmpDictFilePath, newFormatVersion). + close(); + final File dictFile = new File(mDictFilePath); + final File tmpDictFile = new File(tmpDictFilePath); + FileUtils.deleteRecursively(dictFile); + if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { + return false; + } + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); + return true; + } + @UsedForTesting public int calculateProbability(final int unigramProbability, final int bigramProbability) { if (!isValidDictionary()) return NOT_A_PROBABILITY; @@ -340,7 +553,7 @@ public final class BinaryDictionary extends Dictionary { } @UsedForTesting - public String getPropertyForTests(String query) { + public String getPropertyForTest(final String query) { if (!isValidDictionary()) return ""; return getPropertyNative(mNativeDict, query); } |