diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/BinaryDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 563 |
1 files changed, 398 insertions, 165 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index fd296988e..1b5791809 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -17,19 +17,27 @@ package com.android.inputmethod.latin; import android.text.TextUtils; +import android.util.Log; import android.util.SparseArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.settings.NativeSuggestOptions; -import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; +import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; +import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.JniUtils; +import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.StringUtils; import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -40,10 +48,6 @@ import java.util.Map; public final class BinaryDictionary extends Dictionary { private static final String TAG = BinaryDictionary.class.getSimpleName(); - // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h - private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; - // Must be equal to MAX_RESULTS in native/jni/src/defines.h - private static final int MAX_RESULTS = 18; // The cutoff returned by native for auto-commit confidence. // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; @@ -57,22 +61,34 @@ public final class BinaryDictionary extends Dictionary { @UsedForTesting public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; + public static final int NOT_A_VALID_TIMESTAMP = -1; + + // Format to get unigram flags from native side via getWordPropertyNative(). + private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; + private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; + private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; + private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; + private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; + private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; + + // Format to get probability and historical info from native side via getWordPropertyNative(). + public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; + public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; + public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; + public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; + public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; + + public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate"; + private long mNativeDict; private final Locale mLocale; private final long mDictSize; private final String mDictFilePath; - private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; - private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS]; - private final int[] mSpaceIndices = new int[MAX_RESULTS]; - private final int[] mOutputScores = new int[MAX_RESULTS]; - private final int[] mOutputTypes = new int[MAX_RESULTS]; - // Only one result is ever used - private final int[] mOutputAutoCommitFirstWordConfidence = new int[1]; + private final boolean mUseFullEditDistance; + private final boolean mIsUpdatable; + private boolean mHasUpdated; - private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions(); - - private final SparseArray<DicTraverseSession> mDicTraverseSessions = - CollectionUtils.newSparseArray(); + private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>(); // TODO: There should be a way to remove used DicTraverseSession objects from // {@code mDicTraverseSessions}. @@ -80,19 +96,15 @@ public final class BinaryDictionary extends Dictionary { synchronized(mDicTraverseSessions) { DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId); if (traverseSession == null) { - traverseSession = mDicTraverseSessions.get(traverseSessionId); - if (traverseSession == null) { - traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); - mDicTraverseSessions.put(traverseSessionId, traverseSession); - } + traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); + mDicTraverseSessions.put(traverseSessionId, traverseSession); } return traverseSession; } } /** - * Constructor for the binary dictionary. This is supposed to be called from the - * dictionary factory. + * Constructs binary dictionary using existing dictionary file. * @param filename the name of the file to read through native code. * @param offset the offset of the dictionary data within the file. * @param length the length of the binary data. @@ -107,125 +119,201 @@ public final class BinaryDictionary extends Dictionary { mLocale = locale; mDictSize = length; mDictFilePath = filename; - mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); + mIsUpdatable = isUpdatable; + mHasUpdated = false; + mUseFullEditDistance = useFullEditDistance; loadDictionary(filename, offset, length, isUpdatable); } + /** + * Constructs binary dictionary on memory. + * @param filename the name of the file used to flush. + * @param useFullEditDistance whether to use the full edit distance in suggestions + * @param dictType the dictionary type, as a human-readable string + * @param formatVersion the format version of the dictionary + * @param attributeMap the attributes of the dictionary + */ + public BinaryDictionary(final String filename, final boolean useFullEditDistance, + final Locale locale, final String dictType, final long formatVersion, + final Map<String, String> attributeMap) { + super(dictType); + mLocale = locale; + mDictSize = 0; + mDictFilePath = filename; + // On memory dictionary is always updatable. + mIsUpdatable = true; + mHasUpdated = false; + mUseFullEditDistance = useFullEditDistance; + final String[] keyArray = new String[attributeMap.size()]; + final String[] valueArray = new String[attributeMap.size()]; + int index = 0; + for (final String key : attributeMap.keySet()) { + keyArray[index] = key; + valueArray[index] = attributeMap.get(key); + index++; + } + mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray); + } + + static { JniUtils.loadNativeLibrary(); } - private static native boolean createEmptyDictFileNative(String filePath, long dictVersion, - String[] attributeKeyStringArray, String[] attributeValueStringArray); private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); - private static native void flushNative(long dict, String filePath); + private static native long createOnMemoryNative(long formatVersion, + String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); + private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, + int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, + ArrayList<int[]> outAttributeValues); + private static native boolean flushNative(long dict, String filePath); private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); - private static native void flushWithGCNative(long dict, String filePath); + private static native boolean flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); + private static native int getFormatVersionNative(long dict); private static native int getProbabilityNative(long dict, int[] word); - private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); - private static native int getSuggestionsNative(long dict, long proximityInfo, + private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word); + private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, + boolean[] isBeginningOfSentenceArray, int[] word); + private static native void getWordPropertyNative(long dict, int[] word, + boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, + int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets, + ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets, + ArrayList<Integer> outShortcutProbabilities); + private static native int getNextWordNative(long dict, int token, int[] outCodePoints, + boolean[] outIsBeginningOfSentence); + private static native void getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, - int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, - int[] suggestOptions, int[] prevWordCodePointArray, - int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, - int[] outputAutoCommitFirstWordConfidence); - private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); - private static native int editDistanceNative(int[] before, int[] after); - private static native void addUnigramWordNative(long dict, int[] word, int probability); - private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, - int probability); - private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); - private static native int calculateProbabilityNative(long dict, int unigramProbability, - int bigramProbability); + int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, + int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, + int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, + int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, + float[] inOutLanguageWeight); + private static native boolean addUnigramEntryNative(long dict, int[] word, int probability, + int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, + boolean isNotAWord, boolean isBlacklisted, int timestamp); + private static native boolean removeUnigramEntryNative(long dict, int[] word); + private static native boolean addNgramEntryNative(long dict, + int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, + int[] word, int probability, int timestamp); + private static native boolean removeNgramEntryNative(long dict, + int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word); + private static native int addMultipleDictionaryEntriesNative(long dict, + LanguageModelParam[] languageModelParams, int startIndex); private static native String getPropertyNative(long dict, String query); - - @UsedForTesting - public static boolean createEmptyDictFile(final String filePath, final long dictVersion, - final Map<String, String> attributeMap) { - final String[] keyArray = new String[attributeMap.size()]; - final String[] valueArray = new String[attributeMap.size()]; - int index = 0; - for (final String key : attributeMap.keySet()) { - keyArray[index] = key; - valueArray[index] = attributeMap.get(key); - index++; - } - return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray); - } + private static native boolean isCorruptedNative(long dict); + private static native boolean migrateNative(long dict, String dictFilePath, + long newFormatVersion); // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable) { + mHasUpdated = false; mNativeDict = openNative(path, startOffset, length, isUpdatable); } - @Override - public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo, - final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { - return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords, - additionalFeaturesOptions, 0 /* sessionId */); + // TODO: Check isCorrupted() for main dictionaries. + public boolean isCorrupted() { + if (!isValidDictionary()) { + return false; + } + if (!isCorruptedNative(mNativeDict)) { + return false; + } + // TODO: Record the corruption. + Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); + Log.e(TAG, "locale: " + mLocale); + Log.e(TAG, "dict size: " + mDictSize); + Log.e(TAG, "updatable: " + mIsUpdatable); + return true; + } + + public DictionaryHeader getHeader() throws UnsupportedFormatException { + if (mNativeDict == 0) { + return null; + } + final int[] outHeaderSize = new int[1]; + final int[] outFormatVersion = new int[1]; + final ArrayList<int[]> outAttributeKeys = new ArrayList<>(); + final ArrayList<int[]> outAttributeValues = new ArrayList<>(); + getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, + outAttributeValues); + final HashMap<String, String> attributes = new HashMap<>(); + for (int i = 0; i < outAttributeKeys.size(); i++) { + final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeKeys.get(i)); + final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeValues.get(i)); + attributes.put(attributeKey, attributeValue); + } + final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( + attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); + return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), + new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); } @Override - public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo, + public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, + final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, - final int sessionId) { - if (!isValidDictionary()) return null; - - Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE); - // TODO: toLowerCase in the native code - final int[] prevWordCodePointArray = (null == prevWord) - ? null : StringUtils.toCodePointArray(prevWord); - final int composerSize = composer.size(); - + final int sessionId, final float[] inOutLanguageWeight) { + if (!isValidDictionary()) { + return null; + } + final DicTraverseSession session = getTraverseSession(sessionId); + Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE); + prevWordsInfo.outputToArray(session.mPrevWordCodePointArrays, + session.mIsBeginningOfSentenceArray); + final InputPointers inputPointers = composer.getInputPointers(); final boolean isGesture = composer.isBatchMode(); - if (composerSize <= 1 || !isGesture) { - if (composerSize > MAX_WORD_LENGTH - 1) return null; - for (int i = 0; i < composerSize; i++) { - mInputCodePoints[i] = composer.getCodeAt(i); + final int inputSize; + if (!isGesture) { + inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( + session.mInputCodePoints); + if (inputSize < 0) { + return null; } + } else { + inputSize = inputPointers.getPointerSize(); } - - final InputPointers ips = composer.getInputPointers(); - final int inputSize = isGesture ? ips.getPointerSize() : composerSize; - mNativeSuggestOptions.setIsGesture(isGesture); - mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions); - // proximityInfo and/or prevWordForBigrams may not be null. - final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), - getTraverseSession(sessionId).getSession(), ips.getXCoordinates(), - ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints, - inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(), - prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices, - mOutputTypes, mOutputAutoCommitFirstWordConfidence); - final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); + session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance); + session.mNativeSuggestOptions.setIsGesture(isGesture); + session.mNativeSuggestOptions.setBlockOffensiveWords(blockOffensiveWords); + session.mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions); + if (inOutLanguageWeight != null) { + session.mInputOutputLanguageWeight[0] = inOutLanguageWeight[0]; + } else { + session.mInputOutputLanguageWeight[0] = Dictionary.NOT_A_LANGUAGE_WEIGHT; + } + // TOOD: Pass multiple previous words information for n-gram. + getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), + getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), + inputPointers.getYCoordinates(), inputPointers.getTimes(), + inputPointers.getPointerIds(), session.mInputCodePoints, inputSize, + session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays, + session.mIsBeginningOfSentenceArray, session.mOutputSuggestionCount, + session.mOutputCodePoints, session.mOutputScores, session.mSpaceIndices, + session.mOutputTypes, session.mOutputAutoCommitFirstWordConfidence, + session.mInputOutputLanguageWeight); + if (inOutLanguageWeight != null) { + inOutLanguageWeight[0] = session.mInputOutputLanguageWeight[0]; + } + final int count = session.mOutputSuggestionCount[0]; + final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>(); for (int j = 0; j < count; ++j) { - final int start = j * MAX_WORD_LENGTH; + final int start = j * Constants.DICTIONARY_MAX_WORD_LENGTH; int len = 0; - while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) { + while (len < Constants.DICTIONARY_MAX_WORD_LENGTH + && session.mOutputCodePoints[start + len] != 0) { ++len; } if (len > 0) { - final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS; - if (blockOffensiveWords - && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE) - && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) { - // If we block potentially offensive words, and if the word is possibly - // offensive, then we don't output it unless it's also an exact match. - continue; - } - final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND; - final int score = SuggestedWordInfo.KIND_WHITELIST == kind - ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j]; - // TODO: check that all users of the `kind' parameter are ready to accept - // flags too and pass mOutputTypes[j] instead of kind - suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len), - score, kind, this /* sourceDict */, - mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, - mOutputAutoCommitFirstWordConfidence[0])); + suggestions.add(new SuggestedWordInfo( + new String(session.mOutputCodePoints, start, len), + session.mOutputScores[j], session.mOutputTypes[j], this /* sourceDict */, + session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, + session.mOutputAutoCommitFirstWordConfidence[0])); } } return suggestions; @@ -235,91 +323,220 @@ public final class BinaryDictionary extends Dictionary { return mNativeDict != 0; } - public static float calcNormalizedScore(final String before, final String after, - final int score) { - return calcNormalizedScoreNative(StringUtils.toCodePointArray(before), - StringUtils.toCodePointArray(after), score); - } - - public static int editDistance(final String before, final String after) { - if (before == null || after == null) { - throw new IllegalArgumentException(); - } - return editDistanceNative(StringUtils.toCodePointArray(before), - StringUtils.toCodePointArray(after)); + public int getFormatVersion() { + return getFormatVersionNative(mNativeDict); } @Override - public boolean isValidWord(final String word) { + public boolean isInDictionary(final String word) { return getFrequency(word) != NOT_A_PROBABILITY; } @Override public int getFrequency(final String word) { - if (word == null) return NOT_A_PROBABILITY; + if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; int[] codePoints = StringUtils.toCodePointArray(word); return getProbabilityNative(mNativeDict, codePoints); } - // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni - // calls when checking for changes in an entire dictionary. - public boolean isValidBigram(final String word0, final String word1) { - return getBigramProbability(word0, word1) != NOT_A_PROBABILITY; + @Override + public int getMaxFrequencyOfExactMatches(final String word) { + if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; + int[] codePoints = StringUtils.toCodePointArray(word); + return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints); + } + + @UsedForTesting + public boolean isValidNgram(final PrevWordsInfo prevWordsInfo, final String word) { + return getNgramProbability(prevWordsInfo, word) != NOT_A_PROBABILITY; } - public int getBigramProbability(final String word0, final String word1) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY; - final int[] codePoints0 = StringUtils.toCodePointArray(word0); - final int[] codePoints1 = StringUtils.toCodePointArray(word1); - return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); + public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + return NOT_A_PROBABILITY; + } + final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; + final boolean[] isBeginningOfSentenceArray = + new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[] wordCodePoints = StringUtils.toCodePointArray(word); + return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays, + isBeginningOfSentenceArray, wordCodePoints); } - // Add a unigram entry to binary dictionary in native code. - public void addUnigramWord(final String word, final int probability) { + public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) { + if (word == null) { + return null; + } + final int[] codePoints = StringUtils.toCodePointArray(word); + final int[] outCodePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; + final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; + final int[] outProbabilityInfo = + new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; + final ArrayList<int[]> outBigramTargets = new ArrayList<>(); + final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>(); + final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); + final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); + getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, + outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, + outShortcutTargets, outShortcutProbabilities); + return new WordProperty(codePoints, + outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); + } + + public static class GetNextWordPropertyResult { + public WordProperty mWordProperty; + public int mNextToken; + + public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) { + mWordProperty = wordProperty; + mNextToken = nextToken; + } + } + + /** + * Method to iterate all words in the dictionary for makedict. + * If token is 0, this method newly starts iterating the dictionary. + */ + public GetNextWordPropertyResult getNextWordProperty(final int token) { + final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; + final boolean[] isBeginningOfSentence = new boolean[1]; + final int nextToken = getNextWordNative(mNativeDict, token, codePoints, + isBeginningOfSentence); + final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); + return new GetNextWordPropertyResult( + getWordProperty(word, isBeginningOfSentence[0]), nextToken); + } + + // Add a unigram entry to binary dictionary with unigram attributes in native code. + public boolean addUnigramEntry(final String word, final int probability, + final String shortcutTarget, final int shortcutProbability, + final boolean isBeginningOfSentence, final boolean isNotAWord, + final boolean isBlacklisted, final int timestamp) { + if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { + return false; + } + final int[] codePoints = StringUtils.toCodePointArray(word); + final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? + StringUtils.toCodePointArray(shortcutTarget) : null; + if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, + shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp)) { + return false; + } + mHasUpdated = true; + return true; + } + + // Remove a unigram entry from the binary dictionary in native code. + public boolean removeUnigramEntry(final String word) { if (TextUtils.isEmpty(word)) { - return; + return false; } final int[] codePoints = StringUtils.toCodePointArray(word); - addUnigramWordNative(mNativeDict, codePoints, probability); + if (!removeUnigramEntryNative(mNativeDict, codePoints)) { + return false; + } + mHasUpdated = true; + return true; } - // Add a bigram entry to binary dictionary in native code. - public void addBigramWords(final String word0, final String word1, final int probability) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { - return; + // Add an n-gram entry to the binary dictionary with timestamp in native code. + public boolean addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, + final int probability, final int timestamp) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + return false; } - final int[] codePoints0 = StringUtils.toCodePointArray(word0); - final int[] codePoints1 = StringUtils.toCodePointArray(word1); - addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability); + final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; + final boolean[] isBeginningOfSentenceArray = + new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[] wordCodePoints = StringUtils.toCodePointArray(word); + if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays, + isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) { + return false; + } + mHasUpdated = true; + return true; } - // Remove a bigram entry form binary dictionary in native code. - public void removeBigramWords(final String word0, final String word1) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { - return; + // Remove an n-gram entry from the binary dictionary in native code. + public boolean removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { + if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + return false; + } + final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; + final boolean[] isBeginningOfSentenceArray = + new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[] wordCodePoints = StringUtils.toCodePointArray(word); + if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays, + isBeginningOfSentenceArray, wordCodePoints)) { + return false; + } + mHasUpdated = true; + return true; + } + + public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { + if (!isValidDictionary()) return; + int processedParamCount = 0; + while (processedParamCount < languageModelParams.length) { + if (needsToRunGC(true /* mindsBlockByGC */)) { + flushWithGC(); + } + processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict, + languageModelParams, processedParamCount); + mHasUpdated = true; + if (processedParamCount <= 0) { + return; + } } - final int[] codePoints0 = StringUtils.toCodePointArray(word0); - final int[] codePoints1 = StringUtils.toCodePointArray(word1); - removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); } private void reopen() { close(); final File dictFile = new File(mDictFilePath); - mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, - dictFile.length(), true /* isUpdatable */); + // WARNING: Because we pass 0 as the offset and file.length() as the length, this can + // only be called for actual files. Right now it's only called by the flush() family of + // functions, which require an updatable dictionary, so it's okay. But beware. + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); } - public void flush() { - if (!isValidDictionary()) return; - flushNative(mNativeDict, mDictFilePath); - reopen(); + // Flush to dict file if the dictionary has been updated. + public boolean flush() { + if (!isValidDictionary()) return false; + if (mHasUpdated) { + if (!flushNative(mNativeDict, mDictFilePath)) { + return false; + } + reopen(); + } + return true; } - public void flushWithGC() { - if (!isValidDictionary()) return; - flushWithGCNative(mNativeDict, mDictFilePath); + // Run GC and flush to dict file if the dictionary has been updated. + public boolean flushWithGCIfHasUpdated() { + if (mHasUpdated) { + return flushWithGC(); + } + return true; + } + + // Run GC and flush to dict file. + public boolean flushWithGC() { + if (!isValidDictionary()) return false; + if (!flushWithGCNative(mNativeDict, mDictFilePath)) { + return false; + } reopen(); + return true; } /** @@ -333,14 +550,30 @@ public final class BinaryDictionary extends Dictionary { return needsToRunGCNative(mNativeDict, mindsBlockByGC); } - @UsedForTesting - public int calculateProbability(final int unigramProbability, final int bigramProbability) { - if (!isValidDictionary()) return NOT_A_PROBABILITY; - return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); + public boolean migrateTo(final int newFormatVersion) { + if (!isValidDictionary()) { + return false; + } + final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION; + if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) { + return false; + } + close(); + final File dictFile = new File(mDictFilePath); + final File tmpDictFile = new File(tmpDictFilePath); + if (!FileUtils.deleteRecursively(dictFile)) { + return false; + } + if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { + return false; + } + loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, + dictFile.length(), mIsUpdatable); + return true; } @UsedForTesting - public String getPropertyForTests(String query) { + public String getPropertyForTest(final String query) { if (!isValidDictionary()) return ""; return getPropertyNative(mNativeDict, query); } |