diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/BinaryDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 136 |
1 files changed, 84 insertions, 52 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 693e1cdcc..2fece7c85 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -42,6 +42,8 @@ import java.util.HashMap; import java.util.Locale; import java.util.Map; +import javax.annotation.Nonnull; + /** * Implements a static, compacted, binary dictionary of standard words. */ @@ -68,7 +70,7 @@ public final class BinaryDictionary extends Dictionary { private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; - private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; + private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2; private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; @@ -83,7 +85,6 @@ public final class BinaryDictionary extends Dictionary { public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating"; private long mNativeDict; - private final Locale mLocale; private final long mDictSize; private final String mDictFilePath; private final boolean mUseFullEditDistance; @@ -117,8 +118,7 @@ public final class BinaryDictionary extends Dictionary { public BinaryDictionary(final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale, final String dictType, final boolean isUpdatable) { - super(dictType); - mLocale = locale; + super(dictType, locale); mDictSize = length; mDictFilePath = filename; mIsUpdatable = isUpdatable; @@ -138,8 +138,7 @@ public final class BinaryDictionary extends Dictionary { public BinaryDictionary(final String filename, final boolean useFullEditDistance, final Locale locale, final String dictType, final long formatVersion, final Map<String, String> attributeMap) { - super(dictType); - mLocale = locale; + super(dictType, locale); mDictSize = 0; mDictFilePath = filename; // On memory dictionary is always updatable. @@ -180,18 +179,20 @@ public final class BinaryDictionary extends Dictionary { boolean[] isBeginningOfSentenceArray, int[] word); private static native void getWordPropertyNative(long dict, int[] word, boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, - int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets, - ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets, - ArrayList<Integer> outShortcutProbabilities); + int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray, + ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, + ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo, + ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); private static native int getNextWordNative(long dict, int token, int[] outCodePoints, boolean[] outIsBeginningOfSentence); private static native void getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, - int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, - int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, - float[] inOutLanguageWeight); + int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints, + int[] outputScores, int[] outputIndices, int[] outputTypes, + int[] outputAutoCommitFirstWordConfidence, + float[] inOutWeightOfLangModelVsSpatialModel); private static native boolean addUnigramEntryNative(long dict, int[] word, int probability, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, boolean isNotAWord, boolean isBlacklisted, int timestamp); @@ -201,6 +202,9 @@ public final class BinaryDictionary extends Dictionary { int[] word, int probability, int timestamp); private static native boolean removeNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word); + private static native boolean updateEntriesForWordWithNgramContextNative(long dict, + int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, + int[] word, boolean isValidWord, int count, int timestamp); private static native int addMultipleDictionaryEntriesNative(long dict, LanguageModelParam[] languageModelParams, int startIndex); private static native String getPropertyNative(long dict, String query); @@ -257,15 +261,16 @@ public final class BinaryDictionary extends Dictionary { @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, - final int sessionId, final float[] inOutLanguageWeight) { + final int sessionId, final float weightForLocale, + final float[] inOutWeightOfLangModelVsSpatialModel) { if (!isValidDictionary()) { return null; } final DicTraverseSession session = getTraverseSession(sessionId); Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE); - prevWordsInfo.outputToArray(session.mPrevWordCodePointArrays, + ngramContext.outputToArray(session.mPrevWordCodePointArrays, session.mIsBeginningOfSentenceArray); final InputPointers inputPointers = composer.getInputPointers(); final boolean isGesture = composer.isBatchMode(); @@ -287,10 +292,13 @@ public final class BinaryDictionary extends Dictionary { settingsValuesForSuggestion.mSpaceAwareGestureEnabled); session.mNativeSuggestOptions.setAdditionalFeaturesOptions( settingsValuesForSuggestion.mAdditionalFeaturesSettingValues); - if (inOutLanguageWeight != null) { - session.mInputOutputLanguageWeight[0] = inOutLanguageWeight[0]; + session.mNativeSuggestOptions.setWeightForLocale(weightForLocale); + if (inOutWeightOfLangModelVsSpatialModel != null) { + session.mInputOutputWeightOfLangModelVsSpatialModel[0] = + inOutWeightOfLangModelVsSpatialModel[0]; } else { - session.mInputOutputLanguageWeight[0] = Dictionary.NOT_A_LANGUAGE_WEIGHT; + session.mInputOutputWeightOfLangModelVsSpatialModel[0] = + Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL; } // TOOD: Pass multiple previous words information for n-gram. getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), @@ -298,12 +306,14 @@ public final class BinaryDictionary extends Dictionary { inputPointers.getYCoordinates(), inputPointers.getTimes(), inputPointers.getPointerIds(), session.mInputCodePoints, inputSize, session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays, - session.mIsBeginningOfSentenceArray, session.mOutputSuggestionCount, - session.mOutputCodePoints, session.mOutputScores, session.mSpaceIndices, - session.mOutputTypes, session.mOutputAutoCommitFirstWordConfidence, - session.mInputOutputLanguageWeight); - if (inOutLanguageWeight != null) { - inOutLanguageWeight[0] = session.mInputOutputLanguageWeight[0]; + session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(), + session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores, + session.mSpaceIndices, session.mOutputTypes, + session.mOutputAutoCommitFirstWordConfidence, + session.mInputOutputWeightOfLangModelVsSpatialModel); + if (inOutWeightOfLangModelVsSpatialModel != null) { + inOutWeightOfLangModelVsSpatialModel[0] = + session.mInputOutputWeightOfLangModelVsSpatialModel[0]; } final int count = session.mOutputSuggestionCount[0]; final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>(); @@ -317,7 +327,8 @@ public final class BinaryDictionary extends Dictionary { if (len > 0) { suggestions.add(new SuggestedWordInfo( new String(session.mOutputCodePoints, start, len), - session.mOutputScores[j], session.mOutputTypes[j], this /* sourceDict */, + (int)(session.mOutputScores[j] * weightForLocale), session.mOutputTypes[j], + this /* sourceDict */, session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, session.mOutputAutoCommitFirstWordConfidence[0])); } @@ -353,18 +364,17 @@ public final class BinaryDictionary extends Dictionary { } @UsedForTesting - public boolean isValidNgram(final PrevWordsInfo prevWordsInfo, final String word) { - return getNgramProbability(prevWordsInfo, word) != NOT_A_PROBABILITY; + public boolean isValidNgram(final NgramContext ngramContext, final String word) { + return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY; } - public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { - if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + public int getNgramProbability(final NgramContext ngramContext, final String word) { + if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { return NOT_A_PROBABILITY; } - final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; - final boolean[] isBeginningOfSentenceArray = - new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); final int[] wordCodePoints = StringUtils.toCodePointArray(word); return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays, isBeginningOfSentenceArray, wordCodePoints); @@ -379,20 +389,25 @@ public final class BinaryDictionary extends Dictionary { final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; final int[] outProbabilityInfo = new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; - final ArrayList<int[]> outBigramTargets = new ArrayList<>(); - final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>(); + final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>(); + final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray = + new ArrayList<>(); + final ArrayList<int[]> outNgramTargets = new ArrayList<>(); + final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>(); final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, - outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, - outShortcutTargets, outShortcutProbabilities); + outFlags, outProbabilityInfo, outNgramPrevWordsArray, + outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets, + outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); return new WordProperty(codePoints, outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], - outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX], outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, - outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray, + outNgramTargets, outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); } @@ -453,15 +468,14 @@ public final class BinaryDictionary extends Dictionary { } // Add an n-gram entry to the binary dictionary with timestamp in native code. - public boolean addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, + public boolean addNgramEntry(final NgramContext ngramContext, final String word, final int probability, final int timestamp) { - if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { return false; } - final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; - final boolean[] isBeginningOfSentenceArray = - new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); final int[] wordCodePoints = StringUtils.toCodePointArray(word); if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays, isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) { @@ -472,14 +486,13 @@ public final class BinaryDictionary extends Dictionary { } // Remove an n-gram entry from the binary dictionary in native code. - public boolean removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { - if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + public boolean removeNgramEntry(final NgramContext ngramContext, final String word) { + if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { return false; } - final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; - final boolean[] isBeginningOfSentenceArray = - new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); final int[] wordCodePoints = StringUtils.toCodePointArray(word); if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays, isBeginningOfSentenceArray, wordCodePoints)) { @@ -489,6 +502,25 @@ public final class BinaryDictionary extends Dictionary { return true; } + // Update entries for the word occurrence with the ngramContext. + public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext, + final String word, final boolean isValidWord, final int count, final int timestamp) { + if (TextUtils.isEmpty(word)) { + return false; + } + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[] wordCodePoints = StringUtils.toCodePointArray(word); + if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays, + isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) { + return false; + } + mHasUpdated = true; + return true; + } + + @UsedForTesting public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { if (!isValidDictionary()) return; int processedParamCount = 0; @@ -596,7 +628,7 @@ public final class BinaryDictionary extends Dictionary { } @UsedForTesting - public String getPropertyForTest(final String query) { + public String getPropertyForGettingStats(final String query) { if (!isValidDictionary()) return ""; return getPropertyNative(mNativeDict, query); } |