diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/BinaryDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 498 |
1 files changed, 361 insertions, 137 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index fd296988e..1d087439d 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -17,19 +17,28 @@ package com.android.inputmethod.latin; import android.text.TextUtils; +import android.util.Log; import android.util.SparseArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; +import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.settings.NativeSuggestOptions; -import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; +import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.JniUtils; +import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.StringUtils; import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -57,22 +66,44 @@ public final class BinaryDictionary extends Dictionary { @UsedForTesting public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; + public static final int NOT_A_VALID_TIMESTAMP = -1; + + // Format to get unigram flags from native side via getWordPropertyNative(). + private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4; + private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; + private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; + private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; + private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; + + // Format to get probability and historical info from native side via getWordPropertyNative(). + public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; + public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; + public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; + public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; + public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; + + public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate"; + private long mNativeDict; private final Locale mLocale; private final long mDictSize; private final String mDictFilePath; + private final boolean mIsUpdatable; + private boolean mHasUpdated; + private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; + private final int[] mOutputSuggestionCount = new int[1]; private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS]; private final int[] mSpaceIndices = new int[MAX_RESULTS]; private final int[] mOutputScores = new int[MAX_RESULTS]; private final int[] mOutputTypes = new int[MAX_RESULTS]; // Only one result is ever used private final int[] mOutputAutoCommitFirstWordConfidence = new int[1]; + private final float[] mInputOutputLanguageWeight = new float[1]; private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions(); - private final SparseArray<DicTraverseSession> mDicTraverseSessions = - CollectionUtils.newSparseArray(); + private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>(); // TODO: There should be a way to remove used DicTraverseSession objects from // {@code mDicTraverseSessions}. @@ -80,19 +111,15 @@ public final class BinaryDictionary extends Dictionary { synchronized(mDicTraverseSessions) { DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId); if (traverseSession == null) { - traverseSession = mDicTraverseSessions.get(traverseSessionId); - if (traverseSession == null) { - traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); - mDicTraverseSessions.put(traverseSessionId, traverseSession); - } + traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); + mDicTraverseSessions.put(traverseSessionId, traverseSession); } return traverseSession; } } /** - * Constructor for the binary dictionary. This is supposed to be called from the - * dictionary factory. + * Constructs binary dictionary using existing dictionary file. * @param filename the name of the file to read through native code. * @param offset the offset of the dictionary data within the file. * @param length the length of the binary data. @@ -107,43 +134,32 @@ public final class BinaryDictionary extends Dictionary { mLocale = locale; mDictSize = length; mDictFilePath = filename; + mIsUpdatable = isUpdatable; + mHasUpdated = false; mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); loadDictionary(filename, offset, length, isUpdatable); } - static { - JniUtils.loadNativeLibrary(); - } - - private static native boolean createEmptyDictFileNative(String filePath, long dictVersion, - String[] attributeKeyStringArray, String[] attributeValueStringArray); - private static native long openNative(String sourceDir, long dictOffset, long dictSize, - boolean isUpdatable); - private static native void flushNative(long dict, String filePath); - private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); - private static native void flushWithGCNative(long dict, String filePath); - private static native void closeNative(long dict); - private static native int getProbabilityNative(long dict, int[] word); - private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); - private static native int getSuggestionsNative(long dict, long proximityInfo, - long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, - int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, - int[] suggestOptions, int[] prevWordCodePointArray, - int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, - int[] outputAutoCommitFirstWordConfidence); - private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); - private static native int editDistanceNative(int[] before, int[] after); - private static native void addUnigramWordNative(long dict, int[] word, int probability); - private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, - int probability); - private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); - private static native int calculateProbabilityNative(long dict, int unigramProbability, - int bigramProbability); - private static native String getPropertyNative(long dict, String query); - + /** + * Constructs binary dictionary on memory. + * @param filename the name of the file used to flush. + * @param useFullEditDistance whether to use the full edit distance in suggestions + * @param dictType the dictionary type, as a human-readable string + * @param formatVersion the format version of the dictionary + * @param attributeMap the attributes of the dictionary + */ @UsedForTesting - public static boolean createEmptyDictFile(final String filePath, final long dictVersion, + public BinaryDictionary(final String filename, final boolean useFullEditDistance, + final Locale locale, final String dictType, final long formatVersion, final Map<String, String> attributeMap) { + super(dictType); + mLocale = locale; + mDictSize = 0; + mDictFilePath = filename; + // On memory dictionary is always updatable. + mIsUpdatable = true; + mHasUpdated = false; + mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); final String[] keyArray = new String[attributeMap.size()]; final String[] valueArray = new String[attributeMap.size()]; int index = 0; @@ -152,56 +168,150 @@ public final class BinaryDictionary extends Dictionary { valueArray[index] = attributeMap.get(key); index++; } - return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray); + mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray); } + + static { + JniUtils.loadNativeLibrary(); + } + + private static native long openNative(String sourceDir, long dictOffset, long dictSize, + boolean isUpdatable); + private static native long createOnMemoryNative(long formatVersion, + String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); + private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, + int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, + ArrayList<int[]> outAttributeValues); + private static native boolean flushNative(long dict, String filePath); + private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); + private static native boolean flushWithGCNative(long dict, String filePath); + private static native void closeNative(long dict); + private static native int getFormatVersionNative(long dict); + private static native int getProbabilityNative(long dict, int[] word); + private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word); + private static native int getBigramProbabilityNative(long dict, int[] word0, + boolean isBeginningOfSentence, int[] word1); + private static native void getWordPropertyNative(long dict, int[] word, + int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, + ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, + ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); + private static native int getNextWordNative(long dict, int token, int[] outCodePoints); + private static native void getSuggestionsNative(long dict, long proximityInfo, + long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, + int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, + int[] prevWordCodePointArray, boolean isBeginningOfSentence, + int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, + int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, + float[] inOutLanguageWeight); + private static native boolean addUnigramWordNative(long dict, int[] word, int probability, + int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, + boolean isNotAWord, boolean isBlacklisted, int timestamp); + private static native boolean addBigramWordsNative(long dict, int[] word0, + boolean isBeginningOfSentence, int[] word1, int probability, int timestamp); + private static native boolean removeBigramWordsNative(long dict, int[] word0, + boolean isBeginningOfSentence, int[] word1); + private static native int addMultipleDictionaryEntriesNative(long dict, + LanguageModelParam[] languageModelParams, int startIndex); + private static native String getPropertyNative(long dict, String query); + private static native boolean isCorruptedNative(long dict); + private static native boolean migrateNative(long dict, String dictFilePath, + long newFormatVersion); + // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable) { + mHasUpdated = false; mNativeDict = openNative(path, startOffset, length, isUpdatable); } - @Override - public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo, - final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { - return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords, - additionalFeaturesOptions, 0 /* sessionId */); + // TODO: Check isCorrupted() for main dictionaries. + public boolean isCorrupted() { + if (!isValidDictionary()) { + return false; + } + if (!isCorruptedNative(mNativeDict)) { + return false; + } + // TODO: Record the corruption. + Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); + Log.e(TAG, "locale: " + mLocale); + Log.e(TAG, "dict size: " + mDictSize); + Log.e(TAG, "updatable: " + mIsUpdatable); + return true; + } + + public DictionaryHeader getHeader() throws UnsupportedFormatException { + if (mNativeDict == 0) { + return null; + } + final int[] outHeaderSize = new int[1]; + final int[] outFormatVersion = new int[1]; + final ArrayList<int[]> outAttributeKeys = new ArrayList<>(); + final ArrayList<int[]> outAttributeValues = new ArrayList<>(); + getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, + outAttributeValues); + final HashMap<String, String> attributes = new HashMap<>(); + for (int i = 0; i < outAttributeKeys.size(); i++) { + final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeKeys.get(i)); + final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeValues.get(i)); + attributes.put(attributeKey, attributeValue); + } + final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( + attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); + return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), + new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); } @Override - public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo, + public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, + final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, - final int sessionId) { - if (!isValidDictionary()) return null; + final int sessionId, final float[] inOutLanguageWeight) { + if (!isValidDictionary()) { + return null; + } Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE); // TODO: toLowerCase in the native code - final int[] prevWordCodePointArray = (null == prevWord) - ? null : StringUtils.toCodePointArray(prevWord); - final int composerSize = composer.size(); - + final int[] prevWordCodePointArray = (null == prevWordsInfo.mPrevWord) + ? null : StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); + final InputPointers inputPointers = composer.getInputPointers(); final boolean isGesture = composer.isBatchMode(); - if (composerSize <= 1 || !isGesture) { - if (composerSize > MAX_WORD_LENGTH - 1) return null; - for (int i = 0; i < composerSize; i++) { - mInputCodePoints[i] = composer.getCodeAt(i); + final int inputSize; + if (!isGesture) { + inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( + mInputCodePoints); + if (inputSize < 0) { + return null; } + } else { + inputSize = inputPointers.getPointerSize(); } - final InputPointers ips = composer.getInputPointers(); - final int inputSize = isGesture ? ips.getPointerSize() : composerSize; mNativeSuggestOptions.setIsGesture(isGesture); mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions); + if (inOutLanguageWeight != null) { + mInputOutputLanguageWeight[0] = inOutLanguageWeight[0]; + } else { + mInputOutputLanguageWeight[0] = Dictionary.NOT_A_LANGUAGE_WEIGHT; + } // proximityInfo and/or prevWordForBigrams may not be null. - final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), - getTraverseSession(sessionId).getSession(), ips.getXCoordinates(), - ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints, - inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(), - prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices, - mOutputTypes, mOutputAutoCommitFirstWordConfidence); - final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); + getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), + getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), + inputPointers.getYCoordinates(), inputPointers.getTimes(), + inputPointers.getPointerIds(), mInputCodePoints, inputSize, + mNativeSuggestOptions.getOptions(), prevWordCodePointArray, + prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount, + mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes, + mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight); + if (inOutLanguageWeight != null) { + inOutLanguageWeight[0] = mInputOutputLanguageWeight[0]; + } + final int count = mOutputSuggestionCount[0]; + final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>(); for (int j = 0; j < count; ++j) { final int start = j * MAX_WORD_LENGTH; int len = 0; @@ -209,23 +319,18 @@ public final class BinaryDictionary extends Dictionary { ++len; } if (len > 0) { - final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS; - if (blockOffensiveWords - && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE) - && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) { + final SuggestedWordInfo suggestedWordInfo = + new SuggestedWordInfo(new String(mOutputCodePoints, start, len), + mOutputScores[j], mOutputTypes[j], this /* sourceDict */, + mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, + mOutputAutoCommitFirstWordConfidence[0]); + if (blockOffensiveWords && suggestedWordInfo.isPossiblyOffensive() + && !suggestedWordInfo.isExactMatch()) { // If we block potentially offensive words, and if the word is possibly // offensive, then we don't output it unless it's also an exact match. continue; } - final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND; - final int score = SuggestedWordInfo.KIND_WHITELIST == kind - ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j]; - // TODO: check that all users of the `kind' parameter are ready to accept - // flags too and pass mOutputTypes[j] instead of kind - suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len), - score, kind, this /* sourceDict */, - mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, - mOutputAutoCommitFirstWordConfidence[0])); + suggestions.add(suggestedWordInfo); } } return suggestions; @@ -235,91 +340,194 @@ public final class BinaryDictionary extends Dictionary { return mNativeDict != 0; } - public static float calcNormalizedScore(final String before, final String after, - final int score) { - return calcNormalizedScoreNative(StringUtils.toCodePointArray(before), - StringUtils.toCodePointArray(after), score); - } - - public static int editDistance(final String before, final String after) { - if (before == null || after == null) { - throw new IllegalArgumentException(); - } - return editDistanceNative(StringUtils.toCodePointArray(before), - StringUtils.toCodePointArray(after)); + public int getFormatVersion() { + return getFormatVersionNative(mNativeDict); } @Override - public boolean isValidWord(final String word) { + public boolean isInDictionary(final String word) { return getFrequency(word) != NOT_A_PROBABILITY; } @Override public int getFrequency(final String word) { - if (word == null) return NOT_A_PROBABILITY; + if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; int[] codePoints = StringUtils.toCodePointArray(word); return getProbabilityNative(mNativeDict, codePoints); } - // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni - // calls when checking for changes in an entire dictionary. - public boolean isValidBigram(final String word0, final String word1) { - return getBigramProbability(word0, word1) != NOT_A_PROBABILITY; + @Override + public int getMaxFrequencyOfExactMatches(final String word) { + if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; + int[] codePoints = StringUtils.toCodePointArray(word); + return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints); } - public int getBigramProbability(final String word0, final String word1) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY; - final int[] codePoints0 = StringUtils.toCodePointArray(word0); - final int[] codePoints1 = StringUtils.toCodePointArray(word1); - return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); + @UsedForTesting + public boolean isValidNgram(final PrevWordsInfo prevWordsInfo, final String word) { + return getNgramProbability(prevWordsInfo, word) != NOT_A_PROBABILITY; + } + + public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + return NOT_A_PROBABILITY; + } + final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); + final int[] codePoints1 = StringUtils.toCodePointArray(word); + return getBigramProbabilityNative(mNativeDict, codePoints0, + prevWordsInfo.mIsBeginningOfSentence, codePoints1); } - // Add a unigram entry to binary dictionary in native code. - public void addUnigramWord(final String word, final int probability) { + public WordProperty getWordProperty(final String word) { if (TextUtils.isEmpty(word)) { - return; + return null; } final int[] codePoints = StringUtils.toCodePointArray(word); - addUnigramWordNative(mNativeDict, codePoints, probability); + final int[] outCodePoints = new int[MAX_WORD_LENGTH]; + final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; + final int[] outProbabilityInfo = + new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; + final ArrayList<int[]> outBigramTargets = new ArrayList<>(); + final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>(); + final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); + final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); + getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + return new WordProperty(codePoints, + outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + } + + public static class GetNextWordPropertyResult { + public WordProperty mWordProperty; + public int mNextToken; + + public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) { + mWordProperty = wordProperty; + mNextToken = nextToken; + } + } + + /** + * Method to iterate all words in the dictionary for makedict. + * If token is 0, this method newly starts iterating the dictionary. + */ + public GetNextWordPropertyResult getNextWordProperty(final int token) { + final int[] codePoints = new int[MAX_WORD_LENGTH]; + final int nextToken = getNextWordNative(mNativeDict, token, codePoints); + final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); + return new GetNextWordPropertyResult(getWordProperty(word), nextToken); + } + + // Add a unigram entry to binary dictionary with unigram attributes in native code. + public boolean addUnigramEntry(final String word, final int probability, + final String shortcutTarget, final int shortcutProbability, + final boolean isBeginningOfSentence, final boolean isNotAWord, + final boolean isBlacklisted, final int timestamp) { + if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { + return false; + } + final int[] codePoints = StringUtils.toCodePointArray(word); + final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? + StringUtils.toCodePointArray(shortcutTarget) : null; + if (!addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, + shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp)) { + return false; + } + mHasUpdated = true; + return true; + } + + // Add an n-gram entry to the binary dictionary with timestamp in native code. + public boolean addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, + final int probability, final int timestamp) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + return false; + } + final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); + final int[] codePoints1 = StringUtils.toCodePointArray(word); + if (!addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence, + codePoints1, probability, timestamp)) { + return false; + } + mHasUpdated = true; + return true; } - // Add a bigram entry to binary dictionary in native code. - public void addBigramWords(final String word0, final String word1, final int probability) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { - return; + // Remove an n-gram entry from the binary dictionary in native code. + public boolean removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + return false; + } + final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); + final int[] codePoints1 = StringUtils.toCodePointArray(word); + if (!removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence, + codePoints1)) { + return false; } - final int[] codePoints0 = StringUtils.toCodePointArray(word0); - final int[] codePoints1 = StringUtils.toCodePointArray(word1); - addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability); + mHasUpdated = true; + return true; } - // Remove a bigram entry form binary dictionary in native code. - public void removeBigramWords(final String word0, final String word1) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { - return; + public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { + if (!isValidDictionary()) return; + int processedParamCount = 0; + while (processedParamCount < languageModelParams.length) { + if (needsToRunGC(true /* mindsBlockByGC */)) { + flushWithGC(); + } + processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict, + languageModelParams, processedParamCount); + mHasUpdated = true; + if (processedParamCount <= 0) { + return; + } } - final int[] codePoints0 = StringUtils.toCodePointArray(word0); - final int[] codePoints1 = StringUtils.toCodePointArray(word1); - removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); } private void reopen() { close(); final File dictFile = new File(mDictFilePath); - mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, - dictFile.length(), true /* isUpdatable */); + // WARNING: Because we pass 0 as the offset and file.length() as the length, this can + // only be called for actual files. Right now it's only called by the flush() family of + // functions, which require an updatable dictionary, so it's okay. But beware. + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); } - public void flush() { - if (!isValidDictionary()) return; - flushNative(mNativeDict, mDictFilePath); - reopen(); + // Flush to dict file if the dictionary has been updated. + public boolean flush() { + if (!isValidDictionary()) return false; + if (mHasUpdated) { + if (!flushNative(mNativeDict, mDictFilePath)) { + return false; + } + reopen(); + } + return true; } - public void flushWithGC() { - if (!isValidDictionary()) return; - flushWithGCNative(mNativeDict, mDictFilePath); + // Run GC and flush to dict file if the dictionary has been updated. + public boolean flushWithGCIfHasUpdated() { + if (mHasUpdated) { + return flushWithGC(); + } + return true; + } + + // Run GC and flush to dict file. + public boolean flushWithGC() { + if (!isValidDictionary()) return false; + if (!flushWithGCNative(mNativeDict, mDictFilePath)) { + return false; + } reopen(); + return true; } /** @@ -333,14 +541,30 @@ public final class BinaryDictionary extends Dictionary { return needsToRunGCNative(mNativeDict, mindsBlockByGC); } - @UsedForTesting - public int calculateProbability(final int unigramProbability, final int bigramProbability) { - if (!isValidDictionary()) return NOT_A_PROBABILITY; - return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); + public boolean migrateTo(final int newFormatVersion) { + if (!isValidDictionary()) { + return false; + } + final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION; + if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) { + return false; + } + close(); + final File dictFile = new File(mDictFilePath); + final File tmpDictFile = new File(tmpDictFilePath); + if (!FileUtils.deleteRecursively(dictFile)) { + return false; + } + if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { + return false; + } + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); + return true; } @UsedForTesting - public String getPropertyForTests(String query) { + public String getPropertyForTest(final String query) { if (!isValidDictionary()) return ""; return getPropertyNative(mNativeDict, query); } |