diff options
82 files changed, 868 insertions, 476 deletions
diff --git a/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java b/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java index ad34dc2d2..fcaeca827 100644 --- a/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java +++ b/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java @@ -79,4 +79,7 @@ public final class StatsUtils { public static void onWordCommitSuggestionPickedManually( final String commitWord, final boolean isBatchMode) { } + + public static void onLoadSettings(SettingsValues settingsValues) { + } } diff --git a/java/proguard.flags b/java/proguard.flags index 35b3ac3d3..0c5ad9fe8 100644 --- a/java/proguard.flags +++ b/java/proguard.flags @@ -19,6 +19,6 @@ # to preserve changing those methods' signature. -keep class com.android.inputmethod.latin.AssetFileAddress -keep class com.android.inputmethod.latin.Dictionary --keep class com.android.inputmethod.latin.PrevWordsInfo +-keep class com.android.inputmethod.latin.NgramContext -keep class com.android.inputmethod.latin.makedict.ProbabilityInfo -keep class com.android.inputmethod.latin.utils.LanguageModelParam diff --git a/java/res/values/strings-talkback-descriptions.xml b/java/res/values/strings-talkback-descriptions.xml index 80c7bdbf0..36fa7b3b4 100644 --- a/java/res/values/strings-talkback-descriptions.xml +++ b/java/res/values/strings-talkback-descriptions.xml @@ -139,6 +139,18 @@ <string name="spoken_symbol_unknown">Unknown symbol</string> <!-- Spoken description for unknown emoji code point. --> <string name="spoken_emoji_unknown">Unknown emoji</string> + <!-- Spoken description for emoticons ":-!". --> + <string name="spoken_emoticon_3A_2D_21_20">Bored face</string> + <!-- Spoken description for emoticons ":-$". --> + <string name="spoken_emoticon_3A_2D_24_20">Embarrassed face</string> + <!-- Spoken description for emoticons "B-)". --> + <string name="spoken_emoticon_42_2D_29_20">Face wearing sunglasses</string> + <!-- Spoken description for emoticons ":O". --> + <string name="spoken_emoticon_3A_4F_20">Surprised face</string> + <!-- Spoken description for emoticons ":-*". --> + <string name="spoken_emoticon_3A_2D_2A_20">Kissing face</string> + <!-- Spoken description for emoticons ":-[". --> + <string name="spoken_emoticon_3A_2D_5B_20">Frowning face</string> <!-- Spoken descriptions when opening a more keys keyboard that has alternative characters. --> <string name="spoken_open_more_keys_keyboard">Alternative characters are available</string> diff --git a/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java b/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java index 7a3510ee1..edcdd4c4c 100644 --- a/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java +++ b/java/src/com/android/inputmethod/accessibility/KeyCodeDescriptionMapper.java @@ -37,6 +37,8 @@ final class KeyCodeDescriptionMapper { private static final String SPOKEN_LETTER_RESOURCE_NAME_FORMAT = "spoken_accented_letter_%04X"; private static final String SPOKEN_SYMBOL_RESOURCE_NAME_FORMAT = "spoken_symbol_%04X"; private static final String SPOKEN_EMOJI_RESOURCE_NAME_FORMAT = "spoken_emoji_%04X"; + private static final String SPOKEN_EMOTICON_RESOURCE_NAME_PREFIX = "spoken_emoticon"; + private static final String SPOKEN_EMOTICON_CODE_POINT_FORMAT = "_%02X"; // The resource ID of the string spoken for obscured keys private static final int OBSCURED_KEY_RES_ID = R.string.spoken_description_dot; @@ -109,7 +111,9 @@ final class KeyCodeDescriptionMapper { } if (code == Constants.CODE_OUTPUT_TEXT) { - return key.getOutputText(); + final String outputText = key.getOutputText(); + final String description = getSpokenEmoticonDescription(context, outputText); + return TextUtils.isEmpty(description) ? outputText : description; } // Just attempt to speak the description. @@ -340,4 +344,21 @@ final class KeyCodeDescriptionMapper { } return resId; } + + // TODO: Remove this method once TTS supports emoticon verbalization. + private String getSpokenEmoticonDescription(final Context context, final String outputText) { + final StringBuilder sb = new StringBuilder(SPOKEN_EMOTICON_RESOURCE_NAME_PREFIX); + final int textLength = outputText.length(); + for (int index = 0; index < textLength; index = outputText.offsetByCodePoints(index, 1)) { + final int codePoint = outputText.codePointAt(index); + sb.append(String.format(Locale.ROOT, SPOKEN_EMOTICON_CODE_POINT_FORMAT, codePoint)); + } + final String resourceName = sb.toString(); + final Resources resources = context.getResources(); + // Note that the resource package name may differ from the context package name. + final String resourcePackageName = resources.getResourcePackageName( + R.string.spoken_description_unknown); + final int resId = resources.getIdentifier(resourceName, "string", resourcePackageName); + return (resId == 0) ? null : resources.getString(resId); + } } diff --git a/java/src/com/android/inputmethod/compat/CursorAnchorInfoCompatWrapper.java b/java/src/com/android/inputmethod/compat/CursorAnchorInfoCompatWrapper.java index c937eeeaa..5af31795c 100644 --- a/java/src/com/android/inputmethod/compat/CursorAnchorInfoCompatWrapper.java +++ b/java/src/com/android/inputmethod/compat/CursorAnchorInfoCompatWrapper.java @@ -84,8 +84,8 @@ public final class CursorAnchorInfoCompatWrapper { } @UsedForTesting - public static boolean isAvailable() { - return sCursorAnchorInfoClass.exists(); + public boolean isAvailable() { + return sCursorAnchorInfoClass.exists() && mInstance != null; } private Object mInstance; @@ -96,7 +96,7 @@ public final class CursorAnchorInfoCompatWrapper { @UsedForTesting public static CursorAnchorInfoCompatWrapper fromObject(final Object instance) { - if (!isAvailable()) { + if (!sCursorAnchorInfoClass.exists()) { return new CursorAnchorInfoCompatWrapper(null); } return new CursorAnchorInfoCompatWrapper(instance); diff --git a/java/src/com/android/inputmethod/keyboard/TextDecorator.java b/java/src/com/android/inputmethod/keyboard/TextDecorator.java index 6e4e3281e..c22717f95 100644 --- a/java/src/com/android/inputmethod/keyboard/TextDecorator.java +++ b/java/src/com/android/inputmethod/keyboard/TextDecorator.java @@ -182,7 +182,7 @@ public class TextDecorator { private void layoutMain() { final CursorAnchorInfoCompatWrapper info = mCursorAnchorInfoWrapper; - if (info == null) { + if (info == null || !info.isAvailable()) { cancelLayoutInternalExpectedly("CursorAnchorInfo isn't available."); return; } diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardParams.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardParams.java index 1e1188bd0..71ce768a9 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardParams.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardParams.java @@ -27,6 +27,9 @@ import java.util.Comparator; import java.util.SortedSet; import java.util.TreeSet; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + public class KeyboardParams { public KeyboardId mId; public int mThemeId; @@ -67,7 +70,7 @@ public class KeyboardParams { public final KeyboardTextsSet mTextsSet = new KeyboardTextsSet(); public final KeyStylesSet mKeyStyles = new KeyStylesSet(mTextsSet); - public KeysCache mKeysCache; + @Nullable public KeysCache mKeysCache; public boolean mAllowRedundantMoreKeys; public int mMostCommonKeyHeight = 0; @@ -96,7 +99,7 @@ public class KeyboardParams { clearHistogram(); } - public void onAddKey(final Key newKey) { + public void onAddKey(@Nonnull final Key newKey) { final Key key = (mKeysCache != null) ? mKeysCache.get(newKey) : newKey; final boolean isSpacer = key.isSpacer(); if (isSpacer && key.getWidth() == 0) { @@ -129,7 +132,10 @@ public class KeyboardParams { mSortedKeys.clear(); for (final Key key : allKeys) { final Key filteredKey = Key.removeRedundantMoreKeys(key, lettersOnBaseLayout); - mSortedKeys.add(mKeysCache.replace(key, filteredKey)); + if (mKeysCache != null) { + mKeysCache.replace(key, filteredKey); + } + mSortedKeys.add(filteredKey); } } diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index b164c1779..1da33ed3f 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -255,7 +255,7 @@ public final class BinaryDictionary extends Dictionary { @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel) { @@ -264,7 +264,7 @@ public final class BinaryDictionary extends Dictionary { } final DicTraverseSession session = getTraverseSession(sessionId); Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE); - prevWordsInfo.outputToArray(session.mPrevWordCodePointArrays, + ngramContext.outputToArray(session.mPrevWordCodePointArrays, session.mIsBeginningOfSentenceArray); final InputPointers inputPointers = composer.getInputPointers(); final boolean isGesture = composer.isBatchMode(); @@ -299,7 +299,7 @@ public final class BinaryDictionary extends Dictionary { inputPointers.getYCoordinates(), inputPointers.getTimes(), inputPointers.getPointerIds(), session.mInputCodePoints, inputSize, session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays, - session.mIsBeginningOfSentenceArray, prevWordsInfo.getPrevWordCount(), + session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(), session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores, session.mSpaceIndices, session.mOutputTypes, session.mOutputAutoCommitFirstWordConfidence, @@ -357,17 +357,17 @@ public final class BinaryDictionary extends Dictionary { } @UsedForTesting - public boolean isValidNgram(final PrevWordsInfo prevWordsInfo, final String word) { - return getNgramProbability(prevWordsInfo, word) != NOT_A_PROBABILITY; + public boolean isValidNgram(final NgramContext ngramContext, final String word) { + return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY; } - public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { - if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + public int getNgramProbability(final NgramContext ngramContext, final String word) { + if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { return NOT_A_PROBABILITY; } - final int[][] prevWordCodePointArrays = new int[prevWordsInfo.getPrevWordCount()][]; - final boolean[] isBeginningOfSentenceArray = new boolean[prevWordsInfo.getPrevWordCount()]; - prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); final int[] wordCodePoints = StringUtils.toCodePointArray(word); return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays, isBeginningOfSentenceArray, wordCodePoints); @@ -456,14 +456,14 @@ public final class BinaryDictionary extends Dictionary { } // Add an n-gram entry to the binary dictionary with timestamp in native code. - public boolean addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, + public boolean addNgramEntry(final NgramContext ngramContext, final String word, final int probability, final int timestamp) { - if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { return false; } - final int[][] prevWordCodePointArrays = new int[prevWordsInfo.getPrevWordCount()][]; - final boolean[] isBeginningOfSentenceArray = new boolean[prevWordsInfo.getPrevWordCount()]; - prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); final int[] wordCodePoints = StringUtils.toCodePointArray(word); if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays, isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) { @@ -474,13 +474,13 @@ public final class BinaryDictionary extends Dictionary { } // Remove an n-gram entry from the binary dictionary in native code. - public boolean removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { - if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { + public boolean removeNgramEntry(final NgramContext ngramContext, final String word) { + if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { return false; } - final int[][] prevWordCodePointArrays = new int[prevWordsInfo.getPrevWordCount()][]; - final boolean[] isBeginningOfSentenceArray = new boolean[prevWordsInfo.getPrevWordCount()]; - prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); + final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; + final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; + ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); final int[] wordCodePoints = StringUtils.toCodePointArray(word); if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays, isBeginningOfSentenceArray, wordCodePoints)) { diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index 162a209e3..78c6cbd24 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -218,7 +218,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { */ private void addNameLocked(final String name) { int len = StringUtils.codePointCount(name); - PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; // TODO: Better tokenization for non-Latin writing systems for (int i = 0; i < len; i++) { if (Character.isLetter(name.codePointAt(i))) { @@ -233,19 +233,19 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { final int wordLen = StringUtils.codePointCount(word); if (wordLen <= MAX_WORD_LENGTH && wordLen > 1) { if (DEBUG) { - Log.d(TAG, "addName " + name + ", " + word + ", " + prevWordsInfo); + Log.d(TAG, "addName " + name + ", " + word + ", " + ngramContext); } runGCIfRequiredLocked(true /* mindsBlockByGC */); addUnigramLocked(word, FREQUENCY_FOR_CONTACTS, null /* shortcut */, 0 /* shortcutFreq */, false /* isNotAWord */, false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - if (!prevWordsInfo.isValid() && mUseFirstLastBigrams) { + if (!ngramContext.isValid() && mUseFirstLastBigrams) { runGCIfRequiredLocked(true /* mindsBlockByGC */); - addNgramEntryLocked(prevWordsInfo, word, FREQUENCY_FOR_CONTACTS_BIGRAM, + addNgramEntryLocked(ngramContext, word, FREQUENCY_FOR_CONTACTS_BIGRAM, BinaryDictionary.NOT_A_VALID_TIMESTAMP); } - prevWordsInfo = prevWordsInfo.getNextPrevWordsInfo( - new PrevWordsInfo.WordInfo(word)); + ngramContext = ngramContext.getNextNgramContext( + new NgramContext.WordInfo(word)); } } } diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index b58a52b41..43561ba4b 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -81,10 +81,9 @@ public abstract class Dictionary { } /** - * Searches for suggestions for a given context. For the moment the context is only the - * previous word. + * Searches for suggestions for a given context. * @param composer the key sequence to match with coordinate info, as a WordComposer - * @param prevWordsInfo the information of previous words. + * @param ngramContext the context for n-gram. * @param proximityInfo the object for key proximity. May be ignored by some implementations. * @param settingsValuesForSuggestion the settings values used for the suggestion. * @param sessionId the session id. @@ -96,7 +95,7 @@ public abstract class Dictionary { * @return the list of suggestions (possibly null if none) */ abstract public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel); @@ -191,7 +190,7 @@ public abstract class Dictionary { @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel) { diff --git a/java/src/com/android/inputmethod/latin/DictionaryCollection.java b/java/src/com/android/inputmethod/latin/DictionaryCollection.java index b26b37817..a6d7205e2 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryCollection.java +++ b/java/src/com/android/inputmethod/latin/DictionaryCollection.java @@ -60,7 +60,7 @@ public final class DictionaryCollection extends Dictionary { @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel) { @@ -69,13 +69,13 @@ public final class DictionaryCollection extends Dictionary { // To avoid creating unnecessary objects, we get the list out of the first // dictionary and add the rest to it if not null, hence the get(0) ArrayList<SuggestedWordInfo> suggestions = dictionaries.get(0).getSuggestions(composer, - prevWordsInfo, proximityInfo, settingsValuesForSuggestion, sessionId, + ngramContext, proximityInfo, settingsValuesForSuggestion, sessionId, weightForLocale, inOutWeightOfLangModelVsSpatialModel); if (null == suggestions) suggestions = new ArrayList<>(); final int length = dictionaries.size(); for (int i = 1; i < length; ++ i) { final ArrayList<SuggestedWordInfo> sugg = dictionaries.get(i).getSuggestions(composer, - prevWordsInfo, proximityInfo, settingsValuesForSuggestion, sessionId, + ngramContext, proximityInfo, settingsValuesForSuggestion, sessionId, weightForLocale, inOutWeightOfLangModelVsSpatialModel); if (null != sugg) suggestions.addAll(sugg); } diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java index aa15bd6bf..6dc1e8273 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java +++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java @@ -25,7 +25,7 @@ import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.personalization.ContextualDictionary; import com.android.inputmethod.latin.personalization.PersonalizationDataChunk; @@ -509,23 +509,23 @@ public class DictionaryFacilitator { } public void addToUserHistory(final String suggestion, final boolean wasAutoCapitalized, - final PrevWordsInfo prevWordsInfo, final int timeStampInSeconds, + final NgramContext ngramContext, final int timeStampInSeconds, final boolean blockPotentiallyOffensive) { final DictionaryGroup dictionaryGroup = getDictionaryGroupForActiveLanguage(); final String[] words = suggestion.split(Constants.WORD_SEPARATOR); - PrevWordsInfo prevWordsInfoForCurrentWord = prevWordsInfo; + NgramContext ngramContextForCurrentWord = ngramContext; for (int i = 0; i < words.length; i++) { final String currentWord = words[i]; final boolean wasCurrentWordAutoCapitalized = (i == 0) ? wasAutoCapitalized : false; - addWordToUserHistory(dictionaryGroup, prevWordsInfoForCurrentWord, currentWord, + addWordToUserHistory(dictionaryGroup, ngramContextForCurrentWord, currentWord, wasCurrentWordAutoCapitalized, timeStampInSeconds, blockPotentiallyOffensive); - prevWordsInfoForCurrentWord = - prevWordsInfoForCurrentWord.getNextPrevWordsInfo(new WordInfo(currentWord)); + ngramContextForCurrentWord = + ngramContextForCurrentWord.getNextNgramContext(new WordInfo(currentWord)); } } private void addWordToUserHistory(final DictionaryGroup dictionaryGroup, - final PrevWordsInfo prevWordsInfo, final String word, final boolean wasAutoCapitalized, + final NgramContext ngramContext, final String word, final boolean wasAutoCapitalized, final int timeStampInSeconds, final boolean blockPotentiallyOffensive) { final ExpandableBinaryDictionary userHistoryDictionary = dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY); @@ -571,7 +571,7 @@ public class DictionaryFacilitator { // We demote unrecognized words (frequency < 0, below) by specifying them as "invalid". // We don't add words with 0-frequency (assuming they would be profanity etc.). final boolean isValid = maxFreq > 0; - UserHistoryDictionary.addToDictionary(userHistoryDictionary, prevWordsInfo, secondWord, + UserHistoryDictionary.addToDictionary(userHistoryDictionary, ngramContext, secondWord, isValid, timeStampInSeconds, new DistracterFilterCheckingIsInDictionary( mDistracterFilter, userHistoryDictionary)); @@ -593,12 +593,11 @@ public class DictionaryFacilitator { // TODO: Revise the way to fusion suggestion results. public SuggestionResults getSuggestionResults(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId) { final DictionaryGroup[] dictionaryGroups = mDictionaryGroups; final SuggestionResults suggestionResults = new SuggestionResults( - SuggestedWords.MAX_SUGGESTIONS, - prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence); + SuggestedWords.MAX_SUGGESTIONS, ngramContext.isBeginningOfSentenceContext()); final float[] weightOfLangModelVsSpatialModel = new float[] { Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL }; for (final DictionaryGroup dictionaryGroup : dictionaryGroups) { @@ -606,7 +605,7 @@ public class DictionaryFacilitator { final Dictionary dictionary = dictionaryGroup.getDict(dictType); if (null == dictionary) continue; final ArrayList<SuggestedWordInfo> dictionarySuggestions = - dictionary.getSuggestions(composer, prevWordsInfo, proximityInfo, + dictionary.getSuggestions(composer, ngramContext, proximityInfo, settingsValuesForSuggestion, sessionId, dictionaryGroup.mWeightForLocale, weightOfLangModelVsSpatialModel); if (null == dictionarySuggestions) continue; @@ -720,7 +719,7 @@ public class DictionaryFacilitator { if (contextualDict == null) { return; } - PrevWordsInfo prevWordsInfo = PrevWordsInfo.BEGINNING_OF_SENTENCE; + NgramContext ngramContext = NgramContext.BEGINNING_OF_SENTENCE; for (int i = 0; i < phrase.length; i++) { final String[] subPhrase = Arrays.copyOfRange(phrase, i /* start */, phrase.length); final String subPhraseStr = TextUtils.join(Constants.WORD_SEPARATOR, subPhrase); @@ -730,7 +729,7 @@ public class DictionaryFacilitator { false /* isNotAWord */, false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP, DistracterFilter.EMPTY_DISTRACTER_FILTER); - contextualDict.addNgramEntry(prevWordsInfo, subPhraseStr, + contextualDict.addNgramEntry(ngramContext, subPhraseStr, bigramProbabilityForPhrases, BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (i < phrase.length - 1) { @@ -740,11 +739,11 @@ public class DictionaryFacilitator { false /* isNotAWord */, false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP, DistracterFilter.EMPTY_DISTRACTER_FILTER); - contextualDict.addNgramEntry(prevWordsInfo, phrase[i], + contextualDict.addNgramEntry(ngramContext, phrase[i], bigramProbabilityForWords, BinaryDictionary.NOT_A_VALID_TIMESTAMP); } - prevWordsInfo = - prevWordsInfo.getNextPrevWordsInfo(new PrevWordsInfo.WordInfo(phrase[i])); + ngramContext = + ngramContext.getNextNgramContext(new NgramContext.WordInfo(phrase[i])); } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 68f2b62f0..1bdadc30b 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -305,7 +305,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { @Override public Boolean call() throws Exception { return !distracterFilter.isDistracterToWordsInDictionaries( - PrevWordsInfo.EMPTY_PREV_WORDS_INFO, word, mLocale); + NgramContext.EMPTY_PREV_WORDS_INFO, word, mLocale); } }, new Runnable() { @@ -354,7 +354,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { /** * Adds n-gram information of a word to the dictionary. May overwrite an existing entry. */ - public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, + public void addNgramEntry(final NgramContext ngramContext, final String word, final int frequency, final int timestamp) { reloadDictionaryIfRequired(); asyncExecuteTaskWithWriteLock(new Runnable() { @@ -364,17 +364,17 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { return; } runGCIfRequiredLocked(true /* mindsBlockByGC */); - addNgramEntryLocked(prevWordsInfo, word, frequency, timestamp); + addNgramEntryLocked(ngramContext, word, frequency, timestamp); } }); } - protected void addNgramEntryLocked(final PrevWordsInfo prevWordsInfo, final String word, + protected void addNgramEntryLocked(final NgramContext ngramContext, final String word, final int frequency, final int timestamp) { - if (!mBinaryDictionary.addNgramEntry(prevWordsInfo, word, frequency, timestamp)) { + if (!mBinaryDictionary.addNgramEntry(ngramContext, word, frequency, timestamp)) { if (DEBUG) { Log.i(TAG, "Cannot add n-gram entry."); - Log.i(TAG, " PrevWordsInfo: " + prevWordsInfo + ", word: " + word); + Log.i(TAG, " NgramContext: " + ngramContext + ", word: " + word); } } } @@ -383,7 +383,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * Dynamically remove the n-gram entry in the dictionary. */ @UsedForTesting - public void removeNgramDynamically(final PrevWordsInfo prevWordsInfo, final String word) { + public void removeNgramDynamically(final NgramContext ngramContext, final String word) { reloadDictionaryIfRequired(); asyncExecuteTaskWithWriteLock(new Runnable() { @Override @@ -392,10 +392,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { return; } runGCIfRequiredLocked(true /* mindsBlockByGC */); - if (!mBinaryDictionary.removeNgramEntry(prevWordsInfo, word)) { + if (!mBinaryDictionary.removeNgramEntry(ngramContext, word)) { if (DEBUG) { Log.i(TAG, "Cannot remove n-gram entry."); - Log.i(TAG, " PrevWordsInfo: " + prevWordsInfo + ", word: " + word); + Log.i(TAG, " NgramContext: " + ngramContext + ", word: " + word); } } } @@ -434,7 +434,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel) { reloadDictionaryIfRequired(); @@ -447,7 +447,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { return null; } final ArrayList<SuggestedWordInfo> suggestions = - mBinaryDictionary.getSuggestions(composer, prevWordsInfo, proximityInfo, + mBinaryDictionary.getSuggestions(composer, ngramContext, proximityInfo, settingsValuesForSuggestion, sessionId, weightForLocale, inOutWeightOfLangModelVsSpatialModel); if (mBinaryDictionary.isCorrupted()) { @@ -519,9 +519,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } - protected boolean isValidNgramLocked(final PrevWordsInfo prevWordsInfo, final String word) { + protected boolean isValidNgramLocked(final NgramContext ngramContext, final String word) { if (mBinaryDictionary == null) return false; - return mBinaryDictionary.isValidNgram(prevWordsInfo, word); + return mBinaryDictionary.isValidNgram(ngramContext, word); } /** diff --git a/java/src/com/android/inputmethod/latin/LastComposedWord.java b/java/src/com/android/inputmethod/latin/LastComposedWord.java index 8cbf8379b..f3f736fbc 100644 --- a/java/src/com/android/inputmethod/latin/LastComposedWord.java +++ b/java/src/com/android/inputmethod/latin/LastComposedWord.java @@ -48,7 +48,7 @@ public final class LastComposedWord { public final String mTypedWord; public final CharSequence mCommittedWord; public final String mSeparatorString; - public final PrevWordsInfo mPrevWordsInfo; + public final NgramContext mNgramContext; public final int mCapitalizedMode; public final InputPointers mInputPointers = new InputPointers(Constants.DICTIONARY_MAX_WORD_LENGTH); @@ -64,7 +64,7 @@ public final class LastComposedWord { public LastComposedWord(final ArrayList<Event> events, final InputPointers inputPointers, final String typedWord, final CharSequence committedWord, final String separatorString, - final PrevWordsInfo prevWordsInfo, final int capitalizedMode) { + final NgramContext ngramContext, final int capitalizedMode) { if (inputPointers != null) { mInputPointers.copy(inputPointers); } @@ -73,7 +73,7 @@ public final class LastComposedWord { mCommittedWord = committedWord; mSeparatorString = separatorString; mActive = true; - mPrevWordsInfo = prevWordsInfo; + mNgramContext = ngramContext; mCapitalizedMode = capitalizedMode; } diff --git a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java b/java/src/com/android/inputmethod/latin/NgramContext.java index 1b7e8f96d..6d438584f 100644 --- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java +++ b/java/src/com/android/inputmethod/latin/NgramContext.java @@ -18,6 +18,7 @@ package com.android.inputmethod.latin; import android.text.TextUtils; +import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.utils.StringUtils; import java.util.Arrays; @@ -26,11 +27,11 @@ import java.util.Arrays; * Class to represent information of previous words. This class is used to add n-gram entries * into binary dictionaries, to get predictions, and to get suggestions. */ -public class PrevWordsInfo { - public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO = - new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO); - public static final PrevWordsInfo BEGINNING_OF_SENTENCE = - new PrevWordsInfo(WordInfo.BEGINNING_OF_SENTENCE); +public class NgramContext { + public static final NgramContext EMPTY_PREV_WORDS_INFO = + new NgramContext(WordInfo.EMPTY_WORD_INFO); + public static final NgramContext BEGINNING_OF_SENTENCE = + new NgramContext(WordInfo.BEGINNING_OF_SENTENCE); /** * Word information used to represent previous words information. @@ -86,35 +87,66 @@ public class PrevWordsInfo { // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the // WordComposer was reset and before starting a new composing word, but we should never be // calling getSuggetions* in this situation. - public final WordInfo[] mPrevWordsInfo; + private final WordInfo[] mPrevWordsInfo; + private final int mPrevWordsCount; // Construct from the previous word information. - public PrevWordsInfo(final WordInfo prevWordInfo) { - mPrevWordsInfo = new WordInfo[] { prevWordInfo }; + public NgramContext(final WordInfo... prevWordsInfo) { + mPrevWordsInfo = prevWordsInfo; + mPrevWordsCount = prevWordsInfo.length; } - // Construct from WordInfo array. n-th element represents (n+1)-th previous word's information. - public PrevWordsInfo(final WordInfo[] prevWordsInfo) { - mPrevWordsInfo = prevWordsInfo; + // Construct from WordInfo array and size. The caller shouldn't change prevWordsInfo after + // calling this method. + private NgramContext(final NgramContext ngramContext, final int prevWordsCount) { + if (ngramContext.mPrevWordsCount < prevWordsCount) { + throw new IndexOutOfBoundsException("ngramContext.mPrevWordsCount (" + + ngramContext.mPrevWordsCount + ") is smaller than prevWordsCount (" + + prevWordsCount + ")"); + } + mPrevWordsInfo = ngramContext.mPrevWordsInfo; + mPrevWordsCount = prevWordsCount; } // Create next prevWordsInfo using current prevWordsInfo. - public PrevWordsInfo getNextPrevWordsInfo(final WordInfo wordInfo) { + public NgramContext getNextNgramContext(final WordInfo wordInfo) { final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, - mPrevWordsInfo.length + 1); + mPrevWordsCount + 1); final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; prevWordsInfo[0] = wordInfo; - System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, prevWordsInfo.length - 1); - return new PrevWordsInfo(prevWordsInfo); + System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); + return new NgramContext(prevWordsInfo); } public boolean isValid() { - return mPrevWordsInfo.length > 0 && mPrevWordsInfo[0].isValid(); + return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); + } + + public boolean isBeginningOfSentenceContext() { + return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; + } + + // n is 1-indexed. + // TODO: Remove + public CharSequence getNthPrevWord(final int n) { + if (n <= 0 || n > mPrevWordsCount) { + return null; + } + return mPrevWordsInfo[n - 1].mWord; + } + + // n is 1-indexed. + @UsedForTesting + public boolean isNthPrevWordBeginningOfSontence(final int n) { + if (n <= 0 || n > mPrevWordsCount) { + return false; + } + return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; } public void outputToArray(final int[][] codePointArrays, final boolean[] isBeginningOfSentenceArray) { - for (int i = 0; i < mPrevWordsInfo.length; i++) { + for (int i = 0; i < mPrevWordsCount; i++) { final WordInfo wordInfo = mPrevWordsInfo[i]; if (wordInfo == null || !wordInfo.isValid()) { codePointArrays[i] = new int[0]; @@ -126,39 +158,49 @@ public class PrevWordsInfo { } } - public PrevWordsInfo getTrimmedPrevWordsInfo(final int maxPrevWordCount) { - final int newSize = Math.min(maxPrevWordCount, mPrevWordsInfo.length); - // TODO: Quit creating a new array. - final WordInfo[] prevWordsInfo = Arrays.copyOf(mPrevWordsInfo, newSize); - return new PrevWordsInfo(prevWordsInfo); + public NgramContext getTrimmedNgramContext(final int maxPrevWordCount) { + final int newSize = Math.min(maxPrevWordCount, mPrevWordsCount); + return new NgramContext(this /* prevWordsInfo */, newSize); } public int getPrevWordCount() { - return mPrevWordsInfo.length; + return mPrevWordsCount; } @Override public int hashCode() { - // Just for having equals(). - return mPrevWordsInfo[0].hashCode(); + int hashValue = 0; + for (final WordInfo wordInfo : mPrevWordsInfo) { + if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { + break; + } + hashValue ^= wordInfo.hashCode(); + } + return hashValue; } @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof PrevWordsInfo)) return false; - final PrevWordsInfo prevWordsInfo = (PrevWordsInfo)o; + if (!(o instanceof NgramContext)) return false; + final NgramContext prevWordsInfo = (NgramContext)o; - final int minLength = Math.min(mPrevWordsInfo.length, prevWordsInfo.mPrevWordsInfo.length); + final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); for (int i = 0; i < minLength; i++) { if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { return false; } } - final WordInfo[] longerWordsInfo = - (mPrevWordsInfo.length > prevWordsInfo.mPrevWordsInfo.length) ? - mPrevWordsInfo : prevWordsInfo.mPrevWordsInfo; - for (int i = minLength; i < longerWordsInfo.length; i++) { + final WordInfo[] longerWordsInfo; + final int longerWordsInfoCount; + if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { + longerWordsInfo = mPrevWordsInfo; + longerWordsInfoCount = mPrevWordsCount; + } else { + longerWordsInfo = prevWordsInfo.mPrevWordsInfo; + longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; + } + for (int i = minLength; i < longerWordsInfoCount; i++) { if (longerWordsInfo[i] != null && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { return false; @@ -170,7 +212,7 @@ public class PrevWordsInfo { @Override public String toString() { final StringBuffer builder = new StringBuffer(); - for (int i = 0; i < mPrevWordsInfo.length; i++) { + for (int i = 0; i < mPrevWordsCount; i++) { final WordInfo wordInfo = mPrevWordsInfo[i]; builder.append("PrevWord["); builder.append(i); diff --git a/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java index 827367bb4..bc8bd831c 100644 --- a/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java @@ -51,13 +51,13 @@ public final class ReadOnlyBinaryDictionary extends Dictionary { @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel) { if (mLock.readLock().tryLock()) { try { - return mBinaryDictionary.getSuggestions(composer, prevWordsInfo, proximityInfo, + return mBinaryDictionary.getSuggestions(composer, ngramContext, proximityInfo, settingsValuesForSuggestion, sessionId, weightForLocale, inOutWeightOfLangModelVsSpatialModel); } finally { diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index d672430a1..750706113 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -36,7 +36,7 @@ import com.android.inputmethod.compat.InputConnectionCompatUtils; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.DebugLogUtils; -import com.android.inputmethod.latin.utils.PrevWordsInfoUtils; +import com.android.inputmethod.latin.utils.NgramContextUtils; import com.android.inputmethod.latin.utils.ScriptUtils; import com.android.inputmethod.latin.utils.SpannableStringUtils; import com.android.inputmethod.latin.utils.StringUtils; @@ -593,11 +593,11 @@ public final class RichInputConnection { } @SuppressWarnings("unused") - public PrevWordsInfo getPrevWordsInfoFromNthPreviousWord( + public NgramContext getNgramContextFromNthPreviousWord( final SpacingAndPunctuations spacingAndPunctuations, final int n) { mIC = mParent.getCurrentInputConnection(); if (null == mIC) { - return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + return NgramContext.EMPTY_PREV_WORDS_INFO; } final CharSequence prev = getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0); if (DEBUG_PREVIOUS_TEXT && null != prev) { @@ -618,7 +618,7 @@ public final class RichInputConnection { } } } - return PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( + return NgramContextUtils.getNgramContextFromNthPreviousWord( prev, spacingAndPunctuations, n); } diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 1ecc995b2..d2d9b9b1e 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -68,15 +68,15 @@ public final class Suggest { } public void getSuggestedWords(final WordComposer wordComposer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber, final OnGetSuggestedWordsCallback callback) { if (wordComposer.isBatchMode()) { - getSuggestedWordsForBatchInput(wordComposer, prevWordsInfo, proximityInfo, + getSuggestedWordsForBatchInput(wordComposer, ngramContext, proximityInfo, settingsValuesForSuggestion, inputStyle, sequenceNumber, callback); } else { - getSuggestedWordsForNonBatchInput(wordComposer, prevWordsInfo, proximityInfo, + getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, proximityInfo, settingsValuesForSuggestion, inputStyle, isCorrectionEnabled, sequenceNumber, callback); } @@ -121,7 +121,7 @@ public final class Suggest { // Retrieves suggestions for non-batch input (typing, recorrection, predictions...) // and calls the callback function with the suggestions. private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled, final int sequenceNumber, final OnGetSuggestedWordsCallback callback) { @@ -132,7 +132,7 @@ public final class Suggest { : typedWord; final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( - wordComposer, prevWordsInfo, proximityInfo, settingsValuesForSuggestion, + wordComposer, ngramContext, proximityInfo, settingsValuesForSuggestion, SESSION_ID_TYPING); final ArrayList<SuggestedWordInfo> suggestionsContainer = getTransformedSuggestedWordInfoList(wordComposer, suggestionResults, @@ -209,12 +209,12 @@ public final class Suggest { // Retrieves suggestions for the batch input // and calls the callback function with the suggestions. private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, + final NgramContext ngramContext, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int inputStyle, final int sequenceNumber, final OnGetSuggestedWordsCallback callback) { final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( - wordComposer, prevWordsInfo, proximityInfo, settingsValuesForSuggestion, + wordComposer, ngramContext, proximityInfo, settingsValuesForSuggestion, SESSION_ID_GESTURE); final Locale defaultLocale = mDictionaryFacilitator.getLocale(); final ArrayList<SuggestedWordInfo> suggestionsContainer = diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index 567aa07f1..f85b34b5e 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -448,13 +448,13 @@ public final class WordComposer { // `type' should be one of the LastComposedWord.COMMIT_TYPE_* constants above. // committedWord should contain suggestion spans if applicable. public LastComposedWord commitWord(final int type, final CharSequence committedWord, - final String separatorString, final PrevWordsInfo prevWordsInfo) { + final String separatorString, final NgramContext ngramContext) { // Note: currently, we come here whenever we commit a word. If it's a MANUAL_PICK // or a DECIDED_WORD we may cancel the commit later; otherwise, we should deactivate // the last composed word to ensure this does not happen. final LastComposedWord lastComposedWord = new LastComposedWord(mEvents, mInputPointers, mTypedWordCache.toString(), committedWord, separatorString, - prevWordsInfo, mCapitalizedMode); + ngramContext, mCapitalizedMode); mInputPointers.reset(); if (type != LastComposedWord.COMMIT_TYPE_DECIDED_WORD && type != LastComposedWord.COMMIT_TYPE_MANUAL_PICK) { diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java index 46427e5ca..d7e1eba8b 100644 --- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java +++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java @@ -45,7 +45,7 @@ import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.InputPointers; import com.android.inputmethod.latin.LastComposedWord; import com.android.inputmethod.latin.LatinIME; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.RichInputConnection; import com.android.inputmethod.latin.Suggest; import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback; @@ -1376,7 +1376,7 @@ public final class InputLogic { } private void performAdditionToUserHistoryDictionary(final SettingsValues settingsValues, - final String suggestion, final PrevWordsInfo prevWordsInfo) { + final String suggestion, final NgramContext ngramContext) { // If correction is not enabled, we don't add words to the user history dictionary. // That's to avoid unintended additions in some sensitive fields, or fields that // expect to receive non-words. @@ -1388,7 +1388,7 @@ public final class InputLogic { final int timeStampInSeconds = (int)TimeUnit.MILLISECONDS.toSeconds( System.currentTimeMillis()); mDictionaryFacilitator.addToUserHistory(suggestion, wasAutoCapitalized, - prevWordsInfo, timeStampInSeconds, settingsValues.mBlockPotentiallyOffensive); + ngramContext, timeStampInSeconds, settingsValues.mBlockPotentiallyOffensive); } public void performUpdateSuggestionStripSync(final SettingsValues settingsValues, @@ -1519,10 +1519,10 @@ public final class InputLogic { } } final int[] codePoints = StringUtils.toCodePointArray(typedWord); - // We want the previous word for suggestion. If we have chars in the word + // We want the context of preceding words for suggestion. If we have chars in the word // before the cursor, then we want the word before that, hence 2; otherwise, // we want the word immediately before the cursor, hence 1. - final PrevWordsInfo prevWordsInfo = getPrevWordsInfoFromNthPreviousWordForSuggestion( + final NgramContext ngramContext = getNgramContextFromNthPreviousWordForSuggestion( settingsValues.mSpacingAndPunctuations, 0 == numberOfCharsInWordBeforeCursor ? 1 : 2); mWordComposer.setComposingWord(codePoints, @@ -1591,6 +1591,10 @@ public final class InputLogic { final String committedWordString = committedWord.toString(); final int cancelLength = committedWord.length(); final String separatorString = mLastComposedWord.mSeparatorString; + // If our separator is a space, we won't actually commit it, + // but set the space state to PHANTOM so that a space will be inserted + // on the next keypress + final boolean usePhantomSpace = separatorString.equals(Constants.STRING_SPACE); // We want java chars, not codepoints for the following. final int separatorLength = separatorString.length(); // TODO: should we check our saved separator against the actual contents of the text view? @@ -1611,7 +1615,8 @@ public final class InputLogic { if (!TextUtils.isEmpty(committedWord)) { mDictionaryFacilitator.removeWordFromPersonalizedDicts(committedWordString); } - final String stringToCommit = originallyTypedWord + separatorString; + final String stringToCommit = originallyTypedWord + + (usePhantomSpace ? "" : separatorString); final SpannableString textToCommit = new SpannableString(stringToCommit); if (committedWord instanceof SpannableString) { final SpannableString committedWordWithSuggestionSpans = (SpannableString)committedWord; @@ -1663,6 +1668,9 @@ public final class InputLogic { } else { mConnection.commitText(textToCommit, 1); } + if (usePhantomSpace) { + mSpaceState = SpaceState.PHANTOM; + } } else { // For languages without spaces, we revert the typed string but the cursor is flush // with the typed word, so we need to resume suggestions right away. @@ -1760,24 +1768,24 @@ public final class InputLogic { } /** - * Get information fo previous words from the nth previous word before the cursor as context + * Get n-gram context from the nth previous word before the cursor as context * for the suggestion process. * @param spacingAndPunctuations the current spacing and punctuations settings. * @param nthPreviousWord reverse index of the word to get (1-indexed) * @return the information of previous words */ // TODO: Make this private - public PrevWordsInfo getPrevWordsInfoFromNthPreviousWordForSuggestion( + public NgramContext getNgramContextFromNthPreviousWordForSuggestion( final SpacingAndPunctuations spacingAndPunctuations, final int nthPreviousWord) { if (spacingAndPunctuations.mCurrentLanguageHasSpaces) { // If we are typing in a language with spaces we can just look up the previous // word information from textview. - return mConnection.getPrevWordsInfoFromNthPreviousWord( + return mConnection.getNgramContextFromNthPreviousWord( spacingAndPunctuations, nthPreviousWord); } else { return LastComposedWord.NOT_A_COMPOSED_WORD == mLastComposedWord ? - PrevWordsInfo.BEGINNING_OF_SENTENCE : - new PrevWordsInfo(new PrevWordsInfo.WordInfo( + NgramContext.BEGINNING_OF_SENTENCE : + new NgramContext(new NgramContext.WordInfo( mLastComposedWord.mCommittedWord.toString())); } } @@ -2140,20 +2148,20 @@ public final class InputLogic { final CharSequence chosenWordWithSuggestions = SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord, suggestedWords); - // When we are composing word, get previous words information from the 2nd previous word - // because the 1st previous word is the word to be committed. Otherwise get previous words - // information from the 1st previous word. - final PrevWordsInfo prevWordsInfo = mConnection.getPrevWordsInfoFromNthPreviousWord( + // When we are composing word, get n-gram context from the 2nd previous word because the + // 1st previous word is the word to be committed. Otherwise get n-gram context from the 1st + // previous word. + final NgramContext ngramContext = mConnection.getNgramContextFromNthPreviousWord( settingsValues.mSpacingAndPunctuations, mWordComposer.isComposingWord() ? 2 : 1); mConnection.commitText(chosenWordWithSuggestions, 1); // Add the word to the user history dictionary - performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWordsInfo); + performAdditionToUserHistoryDictionary(settingsValues, chosenWord, ngramContext); // TODO: figure out here if this is an auto-correct or if the best word is actually // what user typed. Note: currently this is done much later in // LastComposedWord#didCommitTypedWord by string equality of the remembered // strings. mLastComposedWord = mWordComposer.commitWord(commitType, - chosenWordWithSuggestions, separatorString, prevWordsInfo); + chosenWordWithSuggestions, separatorString, ngramContext); } /** @@ -2200,7 +2208,7 @@ public final class InputLogic { mWordComposer.adviseCapitalizedModeBeforeFetchingSuggestions( getActualCapsMode(settingsValues, keyboardShiftMode)); mSuggest.getSuggestedWords(mWordComposer, - getPrevWordsInfoFromNthPreviousWordForSuggestion( + getNgramContextFromNthPreviousWordForSuggestion( settingsValues.mSpacingAndPunctuations, // Get the word on which we should search the bigrams. If we are composing // a word, it's whatever is *before* the half-committed word in the buffer, diff --git a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java index df447fd75..3f9ffd285 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java @@ -47,6 +47,7 @@ public final class DictionaryHeader { public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; public static final String ATTRIBUTE_VALUE_TRUE = "1"; + public static final String CODE_POINT_TABLE_KEY = "codePointTable"; public DictionaryHeader(final int headerSize, final DictionaryOptions dictionaryOptions, final FormatOptions formatOptions) throws UnsupportedFormatException { diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index ec3c6e291..2661d5d48 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -237,6 +237,8 @@ public final class FormatSpec { static final int UINT16_MAX = 0xFFFF; static final int UINT24_MAX = 0xFFFFFF; static final int MSB8 = 0x80; + static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; + static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF; /** * Options about file format. diff --git a/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java new file mode 100644 index 000000000..99e0e273f --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java @@ -0,0 +1,26 @@ +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.NgramContext; + +public class NgramProperty { + public final WeightedString mTargetWord; + public final NgramContext mNgramContext; + + public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) { + mTargetWord = targetWord; + mNgramContext = ngramContext; + } + + @Override + public int hashCode() { + return mTargetWord.hashCode() ^ mNgramContext.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if (!(o instanceof NgramProperty)) return false; + final NgramProperty n = (NgramProperty)o; + return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext); + } +} diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index cd78e2235..46705f9db 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.BinaryDictionary; +import com.android.inputmethod.latin.NgramContext; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.utils.CombinedFormatUtils; import com.android.inputmethod.latin.utils.StringUtils; @@ -33,16 +35,17 @@ public final class WordProperty implements Comparable<WordProperty> { public final String mWord; public final ProbabilityInfo mProbabilityInfo; public final ArrayList<WeightedString> mShortcutTargets; - public final ArrayList<WeightedString> mBigrams; + public final ArrayList<NgramProperty> mNgrams; // TODO: Support mIsBeginningOfSentence. public final boolean mIsBeginningOfSentence; public final boolean mIsNotAWord; public final boolean mIsBlacklistEntry; public final boolean mHasShortcuts; - public final boolean mHasBigrams; + public final boolean mHasNgrams; private int mHashCode = 0; + // TODO: Support n-gram. @UsedForTesting public WordProperty(final String word, final ProbabilityInfo probabilityInfo, final ArrayList<WeightedString> shortcutTargets, @@ -51,11 +54,17 @@ public final class WordProperty implements Comparable<WordProperty> { mWord = word; mProbabilityInfo = probabilityInfo; mShortcutTargets = shortcutTargets; - mBigrams = bigrams; + mNgrams = new ArrayList<>(); + final NgramContext ngramContext = new NgramContext(new WordInfo(mWord)); + if (bigrams != null) { + for (final WeightedString bigramTarget : bigrams) { + mNgrams.add(new NgramProperty(bigramTarget, ngramContext)); + } + } mIsBeginningOfSentence = false; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklistEntry; - mHasBigrams = bigrams != null && !bigrams.isEmpty(); + mHasNgrams = bigrams != null && !bigrams.isEmpty(); mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); } @@ -78,19 +87,24 @@ public final class WordProperty implements Comparable<WordProperty> { mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mShortcutTargets = new ArrayList<>(); - mBigrams = new ArrayList<>(); + mNgrams = new ArrayList<>(); mIsBeginningOfSentence = isBeginningOfSentence; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklisted; mHasShortcuts = hasShortcuts; - mHasBigrams = hasBigram; - - final int bigramTargetCount = bigramTargets.size(); - for (int i = 0; i < bigramTargetCount; i++) { - final String bigramTargetString = + mHasNgrams = hasBigram; + + final int relatedNgramCount = bigramTargets.size(); + final WordInfo currentWordInfo = + mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord); + final NgramContext ngramContext = new NgramContext(currentWordInfo); + for (int i = 0; i < relatedNgramCount; i++) { + final String ngramTargetString = StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i)); - mBigrams.add(new WeightedString(bigramTargetString, - createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)))); + final WeightedString ngramTarget = new WeightedString(ngramTargetString, + createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))); + // TODO: Support n-gram. + mNgrams.add(new NgramProperty(ngramTarget, ngramContext)); } final int shortcutTargetCount = shortcutTargets.size(); @@ -102,6 +116,17 @@ public final class WordProperty implements Comparable<WordProperty> { } } + // TODO: Remove + public ArrayList<WeightedString> getBigrams() { + final ArrayList<WeightedString> bigrams = new ArrayList<>(); + for (final NgramProperty ngram : mNgrams) { + if (ngram.mNgramContext.getPrevWordCount() == 1) { + bigrams.add(ngram.mTargetWord); + } + } + return bigrams; + } + public int getProbability() { return mProbabilityInfo.mProbability; } @@ -110,8 +135,8 @@ public final class WordProperty implements Comparable<WordProperty> { return Arrays.hashCode(new Object[] { word.mWord, word.mProbabilityInfo, - word.mShortcutTargets.hashCode(), - word.mBigrams.hashCode(), + word.mShortcutTargets, + word.mNgrams, word.mIsNotAWord, word.mIsBlacklistEntry }); @@ -142,9 +167,9 @@ public final class WordProperty implements Comparable<WordProperty> { if (!(o instanceof WordProperty)) return false; WordProperty w = (WordProperty)o; return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) - && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams) + && mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams) && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry - && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams; + && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams; } @Override diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java index d1486f630..d61684698 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java @@ -23,7 +23,7 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.utils.DistracterFilter; import java.io.File; @@ -53,14 +53,14 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas * Add a word to the user history dictionary. * * @param userHistoryDictionary the user history dictionary - * @param prevWordsInfo the information of previous words + * @param ngramContext the n-gram context * @param word the word the user inputted * @param isValid whether the word is valid or not * @param timestamp the timestamp when the word has been inputted * @param distracterFilter the filter to check whether the word is a distracter */ public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary, - final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid, + final NgramContext ngramContext, final String word, final boolean isValid, final int timestamp, final DistracterFilter distracterFilter) { if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH) { return; @@ -71,12 +71,11 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */, false /* isBlacklisted */, timestamp, distracterFilter); - final boolean isBeginningOfSentenceContext = - prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence; - final PrevWordsInfo prevWordsInfoToBeSaved = - prevWordsInfo.getTrimmedPrevWordsInfo(SUPPORTED_NGRAM - 1); - for (int i = 0; i < prevWordsInfoToBeSaved.getPrevWordCount(); i++) { - final CharSequence prevWord = prevWordsInfoToBeSaved.mPrevWordsInfo[i].mWord; + final boolean isBeginningOfSentenceContext = ngramContext.isBeginningOfSentenceContext(); + final NgramContext ngramContextToBeSaved = + ngramContext.getTrimmedNgramContext(SUPPORTED_NGRAM - 1); + for (int i = 0; i < ngramContextToBeSaved.getPrevWordCount(); i++) { + final CharSequence prevWord = ngramContextToBeSaved.getNthPrevWord(1 /* n */); if (prevWord == null || (prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) { return; } @@ -87,11 +86,11 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas if (isBeginningOfSentenceContext) { // Beginning-of-Sentence n-gram entry is added as an n-gram entry of an OOV word. userHistoryDictionary.addNgramEntry( - prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, + ngramContextToBeSaved.getTrimmedNgramContext(i + 1), word, FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp); } else { userHistoryDictionary.addNgramEntry( - prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, frequency, + ngramContextToBeSaved.getTrimmedNgramContext(i + 1), word, frequency, timestamp); } } diff --git a/java/src/com/android/inputmethod/latin/settings/Settings.java b/java/src/com/android/inputmethod/latin/settings/Settings.java index a171fc330..83adb1c55 100644 --- a/java/src/com/android/inputmethod/latin/settings/Settings.java +++ b/java/src/com/android/inputmethod/latin/settings/Settings.java @@ -32,6 +32,7 @@ import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.utils.AdditionalSubtypeUtils; import com.android.inputmethod.latin.utils.ResourceUtils; import com.android.inputmethod.latin.utils.RunInLocale; +import com.android.inputmethod.latin.utils.StatsUtils; import com.android.inputmethod.latin.utils.StringUtils; import java.util.Collections; @@ -169,6 +170,7 @@ public final class Settings implements SharedPreferences.OnSharedPreferenceChang return; } loadSettings(mContext, mSettingsValues.mLocale, mSettingsValues.mInputAttributes); + StatsUtils.onLoadSettings(mSettingsValues); } finally { mSettingsValuesLock.unlock(); } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 352391611..2a4e14ca7 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -31,7 +31,7 @@ import com.android.inputmethod.keyboard.KeyboardLayoutSet; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.DictionaryFacilitatorLruCache; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.RichInputMethodSubtype; import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; @@ -163,14 +163,14 @@ public final class AndroidSpellCheckerService extends SpellCheckerService } public SuggestionResults getSuggestionResults(final Locale locale, final WordComposer composer, - final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo) { + final NgramContext ngramContext, final ProximityInfo proximityInfo) { Integer sessionId = null; mSemaphore.acquireUninterruptibly(); try { sessionId = mSessionIdPool.poll(); DictionaryFacilitator dictionaryFacilitatorForLocale = mDictionaryFacilitatorCache.get(locale); - return dictionaryFacilitatorForLocale.getSuggestionResults(composer, prevWordsInfo, + return dictionaryFacilitatorForLocale.getSuggestionResults(composer, ngramContext, proximityInfo, mSettingsValuesForSuggestion, sessionId); } finally { if (sessionId != null) { diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java index 34e01197a..8393b306c 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java @@ -25,7 +25,7 @@ import android.view.textservice.SuggestionsInfo; import android.view.textservice.TextInfo; import com.android.inputmethod.compat.TextInfoCompatUtils; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.utils.StringUtils; import java.util.ArrayList; @@ -62,8 +62,8 @@ public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheck final int offset = ssi.getOffsetAt(i); final int length = ssi.getLengthAt(i); final CharSequence subText = typedText.subSequence(offset, offset + length); - final PrevWordsInfo prevWordsInfo = - new PrevWordsInfo(new PrevWordsInfo.WordInfo(currentWord)); + final NgramContext ngramContext = + new NgramContext(new NgramContext.WordInfo(currentWord)); currentWord = subText; if (!subText.toString().contains(AndroidSpellCheckerService.SINGLE_QUOTE)) { continue; @@ -80,7 +80,7 @@ public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheck if (TextUtils.isEmpty(splitText)) { continue; } - if (mSuggestionsCache.getSuggestionsFromCache(splitText.toString(), prevWordsInfo) + if (mSuggestionsCache.getSuggestionsFromCache(splitText.toString(), ngramContext) == null) { continue; } @@ -208,10 +208,10 @@ public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheck } else { prevWord = null; } - final PrevWordsInfo prevWordsInfo = - new PrevWordsInfo(new PrevWordsInfo.WordInfo(prevWord)); + final NgramContext ngramContext = + new NgramContext(new NgramContext.WordInfo(prevWord)); final TextInfo textInfo = textInfos[i]; - retval[i] = onGetSuggestionsInternal(textInfo, prevWordsInfo, suggestionsLimit); + retval[i] = onGetSuggestionsInternal(textInfo, ngramContext, suggestionsLimit); retval[i].setCookieAndSequence(textInfo.getCookie(), textInfo.getSequence()); } return retval; diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java index d668672aa..7b6aacd15 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java @@ -31,7 +31,7 @@ import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.WordComposer; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; @@ -73,27 +73,25 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = new LruCache<>(MAX_CACHE_SIZE); - // TODO: Support n-gram input - private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) { - if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) { + private static String generateKey(final String query, final NgramContext ngramContext) { + if (TextUtils.isEmpty(query) || !ngramContext.isValid()) { return query; } - return query + CHAR_DELIMITER + prevWordsInfo; + return query + CHAR_DELIMITER + ngramContext; } public SuggestionsParams getSuggestionsFromCache(String query, - final PrevWordsInfo prevWordsInfo) { - return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo)); + final NgramContext ngramContext) { + return mUnigramSuggestionsInfoCache.get(generateKey(query, ngramContext)); } - public void putSuggestionsToCache( - final String query, final PrevWordsInfo prevWordsInfo, + public void putSuggestionsToCache(final String query, final NgramContext ngramContext, final String[] suggestions, final int flags) { if (suggestions == null || TextUtils.isEmpty(query)) { return; } mUnigramSuggestionsInfoCache.put( - generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags)); + generateKey(query, ngramContext), new SuggestionsParams(suggestions, flags)); } public void clearCache() { @@ -223,12 +221,11 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { } protected SuggestionsInfo onGetSuggestionsInternal( - final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, - final int suggestionsLimit) { + final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) { try { final String inText = textInfo.getText(); final SuggestionsParams cachedSuggestionsParams = - mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo); + mSuggestionsCache.getSuggestionsFromCache(inText, ngramContext); if (cachedSuggestionsParams != null) { if (DBG) { Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); @@ -283,7 +280,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { composer.setComposingWord(codePoints, coordinates); // TODO: Don't gather suggestions if the limit is <= 0 unless necessary final SuggestionResults suggestionResults = mService.getSuggestionResults( - mLocale, composer, prevWordsInfo, proximityInfo); + mLocale, composer, ngramContext, proximityInfo); final Result result = getResult(capitalizeType, mLocale, suggestionsLimit, mService.getRecommendedThreshold(), text, suggestionResults); isInDict = isInDictForAnyCapitalization(text, capitalizeType); @@ -308,7 +305,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() : 0); final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); - mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions, + mSuggestionsCache.putSuggestionsToCache(text, ngramContext, result.mSuggestions, flags); return retval; } catch (RuntimeException e) { diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java index 1e8df8986..839fce051 100644 --- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java +++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java @@ -365,17 +365,21 @@ final class SuggestionStripLayoutHelper { (PunctuationSuggestions)suggestedWords, stripView); } + final boolean shouldShowUiToAcceptTypedWord = Settings.getInstance().getCurrent() + .mShouldShowUiToAcceptTypedWord; + final int suggestionsCount = suggestedWords.size() + - (shouldShowUiToAcceptTypedWord ? /* typed word */ 1 : 0); final int startIndexOfMoreSuggestions = setupWordViewsAndReturnStartIndexOfMoreSuggestions( suggestedWords, mSuggestionsCountInStrip); final TextView centerWordView = mWordViews.get(mCenterPositionInStrip); final int stripWidth = stripView.getWidth(); final int centerWidth = getSuggestionWidth(mCenterPositionInStrip, stripWidth); - if (suggestedWords.size() == 1 || getTextScaleX(centerWordView.getText(), centerWidth, + if (suggestionsCount == 1 || getTextScaleX(centerWordView.getText(), centerWidth, centerWordView.getPaint()) < MIN_TEXT_XSCALE) { // Layout only the most relevant suggested word at the center of the suggestion strip // by consolidating all slots in the strip. final int countInStrip = 1; - mMoreSuggestionsAvailable = (suggestedWords.size() > countInStrip); + mMoreSuggestionsAvailable = (suggestionsCount > countInStrip); layoutWord(mCenterPositionInStrip, stripWidth - mPadding); stripView.addView(centerWordView); setLayoutWeight(centerWordView, 1.0f, ViewGroup.LayoutParams.MATCH_PARENT); @@ -387,7 +391,7 @@ final class SuggestionStripLayoutHelper { } final int countInStrip = mSuggestionsCountInStrip; - mMoreSuggestionsAvailable = (suggestedWords.size() > countInStrip); + mMoreSuggestionsAvailable = (suggestionsCount > countInStrip); int x = 0; for (int positionInStrip = 0; positionInStrip < countInStrip; positionInStrip++) { if (positionInStrip != 0) { diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java index 34f59e8bc..7e8e55990 100644 --- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java @@ -67,7 +67,7 @@ public class CombinedFormatUtils { builder.append("," + BLACKLISTED_TAG + "=true"); } builder.append("\n"); - if (wordProperty.mShortcutTargets != null) { + if (wordProperty.mHasShortcuts) { for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord); builder.append(","); @@ -75,8 +75,9 @@ public class CombinedFormatUtils { builder.append("\n"); } } - if (wordProperty.mBigrams != null) { - for (final WeightedString bigram : wordProperty.mBigrams) { + if (wordProperty.mHasNgrams) { + // TODO: Support ngram. + for (final WeightedString bigram : wordProperty.getBigrams()) { builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord); builder.append(","); builder.append(formatProbabilityInfo(bigram.mProbabilityInfo)); diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index 6fd241ee9..355d00dac 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -22,23 +22,23 @@ import java.util.Locale; import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; public interface DistracterFilter { /** * Determine whether a word is a distracter to words in dictionaries. * - * @param prevWordsInfo the information of previous words. + * @param ngramContext the n-gram context * @param testedWord the word that will be tested to see whether it is a distracter to words * in dictionaries. * @param locale the locale of word. * @return true if testedWord is a distracter, otherwise false. */ - public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, + public boolean isDistracterToWordsInDictionaries(final NgramContext ngramContext, final String testedWord, final Locale locale); @UsedForTesting - public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, final Locale locale); public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes); @@ -72,13 +72,13 @@ public interface DistracterFilter { public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() { @Override - public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo, + public boolean isDistracterToWordsInDictionaries(NgramContext ngramContext, String testedWord, Locale locale) { return false; } @Override - public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, + public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, final Locale locale) { return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; } diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java index f8a845304..8f0f9bb44 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java @@ -36,7 +36,7 @@ import com.android.inputmethod.keyboard.KeyboardId; import com.android.inputmethod.keyboard.KeyboardLayoutSet; import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.DictionaryFacilitatorLruCache; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.RichInputMethodSubtype; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.WordComposer; @@ -156,14 +156,14 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr /** * Determine whether a word is a distracter to words in dictionaries. * - * @param prevWordsInfo the information of previous words. Not used for now. + * @param ngramContext the n-gram context. Not used for now. * @param testedWord the word that will be tested to see whether it is a distracter to words * in dictionaries. * @param locale the locale of word. * @return true if testedWord is a distracter, otherwise false. */ @Override - public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, + public boolean isDistracterToWordsInDictionaries(final NgramContext ngramContext, final String testedWord, final Locale locale) { if (locale == null) { return false; @@ -250,7 +250,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr final SuggestionResults suggestionResults; synchronized (mLock) { suggestionResults = dictionaryFacilitator.getSuggestionResults( - composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, keyboard.getProximityInfo(), + composer, NgramContext.EMPTY_PREV_WORDS_INFO, keyboard.getProximityInfo(), settingsValuesForSuggestion, 0 /* sessionId */); } if (suggestionResults.isEmpty()) { @@ -283,7 +283,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr return false; } - private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord, + private boolean shouldBeLowerCased(final NgramContext ngramContext, final String testedWord, final Locale locale) { final DictionaryFacilitator dictionaryFacilitator = mDictionaryFacilitatorLruCache.get(locale); @@ -298,7 +298,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr return true; } if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST - && !prevWordsInfo.isValid()) { + && !ngramContext.isValid()) { // TODO: Check beginning-of-sentence. return true; } @@ -306,13 +306,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr } @Override - public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, final Locale locale) { // TODO: Use this method for user history dictionary. if (testedWord == null|| locale == null) { return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */); } - final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale); + final boolean shouldBeLowerCased = shouldBeLowerCased(ngramContext, testedWord, locale); final String caseModifiedWord = shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord; final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord( diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java index 349236f18..df6e97028 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java @@ -22,7 +22,7 @@ import java.util.Locale; import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; public class DistracterFilterCheckingIsInDictionary implements DistracterFilter { private final DistracterFilter mDistracterFilter; @@ -35,7 +35,7 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter } @Override - public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo, + public boolean isDistracterToWordsInDictionaries(NgramContext ngramContext, String testedWord, Locale locale) { if (mDictionary.isInDictionary(testedWord)) { // This filter treats entries that are already in the dictionary as non-distracters @@ -43,14 +43,14 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter return false; } else { return mDistracterFilter.isDistracterToWordsInDictionaries( - prevWordsInfo, testedWord, locale); + ngramContext, testedWord, locale); } } @Override - public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, final Locale locale) { - return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale); + return mDistracterFilter.getWordHandlingType(ngramContext, testedWord, locale); } @Override diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index 7955541aa..73aefb821 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -21,7 +21,7 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.DictionaryFacilitator; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; @@ -89,7 +89,7 @@ public final class LanguageModelParam { final DistracterFilter distracterFilter) { final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>(); final int N = tokens.size(); - PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; for (int i = 0; i < N; ++i) { final String tempWord = tokens.get(i); if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) { @@ -106,7 +106,7 @@ public final class LanguageModelParam { + tempWord + "\""); } // Sentence terminator found. Split. - prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; continue; } if (DEBUG_TOKEN) { @@ -114,41 +114,41 @@ public final class LanguageModelParam { } final LanguageModelParam languageModelParam = detectWhetherVaildWordOrNotAndGetLanguageModelParam( - prevWordsInfo, tempWord, timestamp, locale, distracterFilter); + ngramContext, tempWord, timestamp, locale, distracterFilter); if (languageModelParam == null) { continue; } languageModelParams.add(languageModelParam); - prevWordsInfo = prevWordsInfo.getNextPrevWordsInfo( - new PrevWordsInfo.WordInfo(tempWord)); + ngramContext = ngramContext.getNextNgramContext( + new NgramContext.WordInfo(tempWord)); } return languageModelParams; } private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( - final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, + final NgramContext ngramContext, final String targetWord, final int timestamp, final Locale locale, final DistracterFilter distracterFilter) { if (locale == null) { return null; } - final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo, + final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext, targetWord, locale); final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? targetWord.toLowerCase(locale) : targetWord; - if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) { + if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) { // The word is a distracter. return null; } - return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp, + return createAndGetLanguageModelParamOfWord(ngramContext, word, timestamp, !HandlingType.shouldBeHandledAsOov(wordHandlingType)); } private static LanguageModelParam createAndGetLanguageModelParamOfWord( - final PrevWordsInfo prevWordsInfo, final String word, final int timestamp, + final NgramContext ngramContext, final String word, final int timestamp, final boolean isValidWord) { final int unigramProbability = isValidWord ? UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD; - if (!prevWordsInfo.isValid()) { + if (!ngramContext.isValid()) { if (DEBUG) { Log.d(TAG, "--- add unigram: current(" + (isValidWord ? "Valid" : "OOV") + ") = " + word); @@ -156,12 +156,12 @@ public final class LanguageModelParam { return new LanguageModelParam(word, unigramProbability, timestamp); } if (DEBUG) { - Log.d(TAG, "--- add bigram: prev = " + prevWordsInfo + ", current(" + Log.d(TAG, "--- add bigram: prev = " + ngramContext + ", current(" + (isValidWord ? "Valid" : "OOV") + ") = " + word); } final int bigramProbability = isValidWord ? BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD; - return new LanguageModelParam(prevWordsInfo.mPrevWordsInfo[0].mWord, word, + return new LanguageModelParam(ngramContext.getNthPrevWord(1 /* n */), word, unigramProbability, bigramProbability, timestamp); } } diff --git a/java/src/com/android/inputmethod/latin/utils/PrevWordsInfoUtils.java b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java index 5720d9388..34eeac2c2 100644 --- a/java/src/com/android/inputmethod/latin/utils/PrevWordsInfoUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java @@ -20,12 +20,12 @@ import java.util.Arrays; import java.util.regex.Pattern; import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.PrevWordsInfo; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; +import com.android.inputmethod.latin.NgramContext; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; -public final class PrevWordsInfoUtils { - private PrevWordsInfoUtils() { +public final class NgramContextUtils { + private NgramContextUtils() { // Intentional empty constructor for utility class. } @@ -44,7 +44,7 @@ public final class PrevWordsInfoUtils { // (n = 2) "abc def|" -> beginning-of-sentence, abc // (n = 2) "abc def |" -> beginning-of-sentence, abc // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot - // represent this situation using PrevWordsInfo. See TODO in the method. + // represent this situation using NgramContext. See TODO in the method. // TODO: The next example's result should be "abc, def". This have to be fixed before we // retrieve the prior context of Beginning-of-Sentence. // (n = 2) "abc def. |" -> beginning-of-sentence, abc @@ -52,9 +52,9 @@ public final class PrevWordsInfoUtils { // (n = 2) "abc|" -> beginning-of-sentence // (n = 2) "abc |" -> beginning-of-sentence // (n = 2) "abc. def|" -> beginning-of-sentence - public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev, + public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev, final SpacingAndPunctuations spacingAndPunctuations, final int n) { - if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO; final String[] w = SPACE_REGEX.split(prev); final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO); @@ -98,6 +98,6 @@ public final class PrevWordsInfoUtils { } prevWordsInfo[i] = new WordInfo(focusedWord); } - return new PrevWordsInfo(prevWordsInfo); + return new NgramContext(prevWordsInfo); } } diff --git a/java/src/com/android/inputmethod/latin/utils/SuggestionResults.java b/java/src/com/android/inputmethod/latin/utils/SuggestionResults.java index d6f644228..4e2e396c2 100644 --- a/java/src/com/android/inputmethod/latin/utils/SuggestionResults.java +++ b/java/src/com/android/inputmethod/latin/utils/SuggestionResults.java @@ -31,7 +31,7 @@ import java.util.TreeSet; public final class SuggestionResults extends TreeSet<SuggestedWordInfo> { public final ArrayList<SuggestedWordInfo> mRawSuggestions; // TODO: Instead of a boolean , we may want to include the context of this suggestion results, - // such as {@link PrevWordsInfo}. + // such as {@link NgramContext}. public final boolean mIsBeginningOfSentence; private final int mCapacity; diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index e420f8056..19aeb2895 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -403,10 +403,10 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j jsize wordLength = env->GetArrayLength(word); int wordCodePoints[wordLength]; env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); - // Use 1 for count to indicate the bigram has inputted. - const BigramProperty bigramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(), + // Use 1 for count to indicate the ngram has inputted. + const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(), probability, timestamp, 0 /* level */, 1 /* count */); - return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); + return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty); } static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, @@ -501,12 +501,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); // Use 1 for count to indicate the bigram has inputted. - const BigramProperty bigramProperty( + const NgramProperty ngramProperty( CodePointArrayView(word1CodePoints, word1Length).toVector(), bigramProbability, timestamp, 0 /* level */, 1 /* count */); const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isBeginningOfSentence */); - dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); + dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty); } if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { return i + 1; @@ -603,6 +603,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j } while (token != 0); // Add bigrams. + // TODO: Support ngrams. do { token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); const WordProperty wordProperty = dictionary->getWordProperty( @@ -617,10 +618,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j } const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount, wordProperty.getUnigramProperty()->representsBeginningOfSentence()); - for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { + for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) { if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, - &bigramProperty)) { - LogUtils::logToJava(env, "Cannot add bigram to the new dict."); + &ngramProperty)) { + LogUtils::logToJava(env, "Cannot add ngram to the new dict."); return false; } } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index a3bb408c3..c3f422916 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -144,9 +144,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) { } bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); + return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty); } bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 324e3504a..09f8eaceb 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -85,7 +85,7 @@ class Dictionary { bool removeUnigramEntry(const CodePointArrayView codePoints); bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty); + const NgramProperty *const ngramProperty); bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView codePoints); diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h index 9e0baa032..5d822fa3b 100644 --- a/native/jni/src/suggest/core/dictionary/property/bigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_BIGRAM_PROPERTY_H -#define LATINIME_BIGRAM_PROPERTY_H +#ifndef LATINIME_NGRAM_PROPERTY_H +#define LATINIME_NGRAM_PROPERTY_H #include <vector> @@ -23,10 +23,9 @@ namespace latinime { -// TODO: Change to NgramProperty. -class BigramProperty { +class NgramProperty { public: - BigramProperty(const std::vector<int> &&targetCodePoints, const int probability, + NgramProperty(const std::vector<int> &&targetCodePoints, const int probability, const int timestamp, const int level, const int count) : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), mTimestamp(timestamp), mLevel(level), mCount(count) {} @@ -53,7 +52,7 @@ class BigramProperty { private: // Default copy constructor and assign operator are used for using in std::vector. - DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty); + DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty); // TODO: Make members const. std::vector<int> mTargetCodePoints; @@ -63,4 +62,4 @@ class BigramProperty { int mCount; }; } // namespace latinime -#endif // LATINIME_WORD_PROPERTY_H +#endif // LATINIME_NGRAM_PROPERTY_H diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 66daf3e3f..e3406fac5 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -28,7 +28,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), false /* needsNullTermination */); jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), - !mBigrams.empty(), mUnigramProperty.hasShortcuts(), + !mNgrams.empty(), mUnigramProperty.hasShortcuts(), mUnigramProperty.representsBeginningOfSentence()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), @@ -42,8 +42,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); // Output bigrams. - for (const auto &bigramProperty : mBigrams) { - const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints(); + // TODO: Support n-gram + for (const auto &ngramProperty : mNgrams) { + const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), @@ -51,9 +52,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); - int bigramProbabilityInfo[] = {bigramProperty.getProbability(), - bigramProperty.getTimestamp(), bigramProperty.getLevel(), - bigramProperty.getCount()}; + int bigramProbabilityInfo[] = {ngramProperty.getProbability(), + ngramProperty.getTimestamp(), ngramProperty.getLevel(), + ngramProperty.getCount()}; jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h index 4e6febb3f..0c23e8225 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.h +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -21,7 +21,7 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" namespace latinime { @@ -31,12 +31,12 @@ class WordProperty { public: // Default constructor is used to create an instance that indicates an invalid word. WordProperty() - : mCodePoints(), mUnigramProperty(), mBigrams() {} + : mCodePoints(), mUnigramProperty(), mNgrams() {} WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, - const std::vector<BigramProperty> *const bigrams) + const std::vector<NgramProperty> *const bigrams) : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), - mBigrams(*bigrams) {} + mNgrams(*bigrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, @@ -46,8 +46,8 @@ class WordProperty { return &mUnigramProperty; } - const std::vector<BigramProperty> *getBigramProperties() const { - return &mBigrams; + const std::vector<NgramProperty> *getNgramProperties() const { + return &mNgrams; } private: @@ -56,7 +56,7 @@ class WordProperty { const std::vector<int> mCodePoints; const UnigramProperty mUnigramProperty; - const std::vector<BigramProperty> mBigrams; + const std::vector<NgramProperty> mNgrams; }; } // namespace latinime #endif // LATINIME_WORD_PROPERTY_H diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 1546b2610..f4b97989f 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -81,7 +81,7 @@ class DictionaryStructureWithBufferPolicy { // Returns whether the update was success or not. virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) = 0; + const NgramProperty *const ngramProperty) = 0; // Returns whether the update was success or not. virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp index 3e8e059f2..c6397706d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp @@ -24,7 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h" @@ -60,7 +60,7 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out } bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { // 1. The word has no bigrams yet. // 2. The word has bigrams, and there is the target in the list. // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. @@ -79,7 +79,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, - bigramProperty); + ngramProperty); // Write an entry. const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { @@ -112,7 +112,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &newBigramEntry, bigramProperty); + &newBigramEntry, ngramProperty); if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { return false; } @@ -138,7 +138,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry updatedBigramEntry = originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &updatedBigramEntry, bigramProperty); + &updatedBigramEntry, ngramProperty); return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } @@ -264,18 +264,18 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const BigramEntry *const originalBigramEntry, - const BigramProperty *const bigramProperty) const { + const NgramProperty *const ngramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { - const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(), - bigramProperty->getLevel(), bigramProperty->getCount()); + const HistoricalInfo historicalInfoForUpdate(ngramProperty->getTimestamp(), + ngramProperty->getLevel(), ngramProperty->getCount()); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(), + originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); } else { - return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability()); + return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h index 50a4c9743..aac6f5470 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h @@ -36,7 +36,7 @@ namespace v402 { class BigramDictContent; } // namespace v402 } // namespace backward -class BigramProperty; +class NgramProperty; namespace backward { namespace v402 { } // namespace v402 @@ -64,7 +64,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { } bool addNewEntry(const int terminalId, const int newTargetTerminalId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); bool removeEntry(const int terminalId, const int targetTerminalId); @@ -80,7 +80,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { int *const outTailEntryPos) const; const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, - const BigramProperty *const bigramProperty) const; + const NgramProperty *const ngramProperty) const; bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp index 97a8bcc98..6827c3ee3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp @@ -232,8 +232,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( } bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { - if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) { + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { + if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, ngramProperty, outAddedNewEntry)) { AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d", prevWordIds[0], wordId); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h index 9d8a55bff..d0bab50f8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h @@ -94,7 +94,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index f752f89f1..9631cf1ae 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -30,7 +30,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/session/prev_words_info.h" @@ -312,7 +312,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod } bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -326,9 +326,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("The word is too long to insert the ngram to the dictionary. " - "length: %zd", bigramProperty->getTargetCodePoints()->size()); + "length: %zd", ngramProperty->getTargetCodePoints()->size()); return false; } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; @@ -356,7 +356,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } } const int wordPos = getTerminalPtNodePosFromWordId(getWordId( - CodePointArrayView(*bigramProperty->getTargetCodePoints()), + CodePointArrayView(*ngramProperty->getTargetCodePoints()), false /* forceLowerCaseSearch */)); if (wordPos == NOT_A_DICT_POS) { return false; @@ -364,7 +364,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI bool addedNewBigram = false; const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos), - wordPos, bigramProperty, &addedNewBigram)) { + wordPos, ngramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -499,7 +499,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( ptNodeParams.getTerminalId()); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. - std::vector<BigramProperty> bigrams; + std::vector<NgramProperty> ngrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); if (bigramListPos != NOT_A_DICT_POS) { int bigramWord1CodePoints[MAX_WORD_LENGTH]; @@ -526,7 +526,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( ForgettingCurveUtils::decodeProbability( bigramEntry.getHistoricalInfo(), mHeaderPolicy) : bigramEntry.getProbability(); - bigrams.emplace_back( + ngrams.emplace_back( CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount()); @@ -554,7 +554,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); + return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 8420c94d0..324a53e62 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -59,6 +59,7 @@ namespace backward { namespace v402 { // Word id = Position of a PtNode that represents the word. +// Max supported n-gram is bigram. class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) @@ -112,7 +113,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty); + const NgramProperty *const ngramProperty); bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h index 2aa402748..b8a4a92e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h @@ -76,6 +76,7 @@ class DynamicPtGcEventListeners { int mValidUnigramCount; }; + // TODO: Remove when we stop supporting v402 format. // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram // entries. class TraversePolicyToUpdateBigramProbability diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp index 3b58d7d6d..92fd6f214 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp @@ -82,7 +82,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readi } bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, - const int wordPos, const BigramProperty *const bigramProperty, + const int wordPos, const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { if (prevWordsPtNodePos.empty()) { return false; @@ -96,7 +96,7 @@ bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPt const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); const int wordId = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); - return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry); + return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, ngramProperty, outAddedNewEntry); } bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h index 710047e8c..2bbe2f4dc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h @@ -23,7 +23,7 @@ namespace latinime { -class BigramProperty; +class NgramProperty; class BufferWithExtendableBuffer; class DynamicPtReadingHelper; class PtNodeReader; @@ -46,7 +46,7 @@ class DynamicPtUpdatingHelper { // TODO: Remove after stopping supporting v402. // Add an n-gram entry. bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); // TODO: Remove after stopping supporting v402. // Remove an n-gram entry. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h index 955d779ac..954db9b0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -25,7 +25,7 @@ namespace latinime { -class BigramProperty; +class NgramProperty; class UnigramProperty; // Interface class used to write PtNode information. @@ -72,7 +72,7 @@ class PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0; + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) = 0; virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 13cf9a5a8..49095d0e9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -436,7 +436,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( const PtNodeParams ptNodeParams = mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); // Fetch bigram information. - std::vector<BigramProperty> bigrams; + std::vector<NgramProperty> ngrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); @@ -450,7 +450,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH, bigramWord1CodePoints, &word1Probability); const int probability = getProbability(word1Probability, bigramsIt.getProbability()); - bigrams.emplace_back( + ngrams.emplace_back( CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */); } @@ -478,7 +478,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); + return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 0d679c5dc..a912d03be 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -38,6 +38,7 @@ class DicNode; class DicNodeVector; // Word id = Position of a PtNode that represents the word. +// Max supported n-gram is bigram. class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) @@ -93,7 +94,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h index e1e10ca17..fd52e574c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h @@ -21,7 +21,7 @@ #include <cstdint> #include "defines.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/historical_info.h" @@ -56,12 +56,12 @@ class ProbabilityEntry { mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(), unigramProperty->getCount()) {} - // Create from bigram property. + // Create from ngram property. // TODO: Set flags. - ProbabilityEntry(const BigramProperty *const bigramProperty) - : mFlags(0), mProbability(bigramProperty->getProbability()), - mHistoricalInfo(bigramProperty->getTimestamp(), bigramProperty->getLevel(), - bigramProperty->getCount()) {} + ProbabilityEntry(const NgramProperty *const ngramProperty) + : mFlags(0), mProbability(ngramProperty->getProbability()), + mHistoricalInfo(ngramProperty->getTimestamp(), ngramProperty->getLevel(), + ngramProperty->getCount()) {} bool isValid() const { return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index a1a33d27a..f13512d5a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -61,6 +61,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted( } } +// TODO: Quit using bigramLinkedNodePos. bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, const int bigramLinkedNodePos) { @@ -208,15 +209,16 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( terminalId, &probabilityEntryToWrite); } +// TODO: Support counting ngram entries. bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { + const NgramProperty *const ngramProperty, bool *const outAddedNewBigram) { LanguageModelDictContent *const languageModelDictContent = mBuffers->getMutableLanguageModelDictContent(); const ProbabilityEntry probabilityEntry = languageModelDictContent->getNgramProbabilityEntry(prevWordIds, wordId); - const ProbabilityEntry probabilityEntryOfBigramProperty(bigramProperty); + const ProbabilityEntry probabilityEntryOfNgramProperty(ngramProperty); const ProbabilityEntry updatedProbabilityEntry = createUpdatedEntryFrom( - &probabilityEntry, &probabilityEntryOfBigramProperty); + &probabilityEntry, &probabilityEntryOfNgramProperty); if (!languageModelDictContent->setNgramProbabilityEntry( prevWordIds, wordId, &updatedProbabilityEntry)) { AKLOGE("Cannot add new ngram entry. prevWordId[0]: %d, prevWordId.size(): %zd, wordId: %d", diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index 17915273b..ea4f09904 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -74,7 +74,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 0f0696410..d46acf594 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -23,7 +23,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/session/prev_words_info.h" @@ -266,7 +266,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod } bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -280,9 +280,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("The word is too long to insert the ngram to the dictionary. " - "length: %zd", bigramProperty->getTargetCodePoints()->size()); + "length: %zd", ngramProperty->getTargetCodePoints()->size()); return false; } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; @@ -311,13 +311,13 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI // Refresh word ids. prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); } - const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()), + const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()), false /* forceLowerCaseSearch */); if (wordId == NOT_A_WORD_ID) { return false; } bool addedNewEntry = false; - if (mNodeWriter.addNgramEntry(prevWordIds, wordId, bigramProperty, &addedNewEntry)) { + if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) { if (addedNewEntry) { mBigramCount++; } @@ -451,7 +451,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. // TODO: Support n-gram. - std::vector<BigramProperty> bigrams; + std::vector<NgramProperty> ngrams; const WordIdArrayView prevWordIds = WordIdArrayView::singleElementView(&wordId); int bigramWord1CodePoints[MAX_WORD_LENGTH]; for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries( @@ -463,7 +463,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const int probability = probabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) : probabilityEntry.getProbability(); - bigrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), + ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount()); } @@ -489,7 +489,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(), probabilityEntry.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); + return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index c9bde2cf5..598122bf2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -37,6 +37,7 @@ namespace latinime { class DicNode; class DicNodeVector; +// TODO: Support counting ngram entries. // Word id = Artificial id that is stored in the PtNode looked up by the word. class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: @@ -92,7 +93,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty); + const NgramProperty *const ngramProperty); bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 442abadee..e1ff973de 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -114,14 +114,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } - readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability - traversePolicyToUpdateBigramProbability(&ptNodeWriter); - if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &traversePolicyToUpdateBigramProbability)) { - return false; - } - // Mapping from positions in mBuffer to positions in bufferToWrite. PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h index b6278c4cb..3569d0576 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h @@ -33,6 +33,7 @@ class Ver4PatriciaTrieWritingHelper { Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) : mBuffers(buffers) {} + // TODO: Support counting ngram entries. bool writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const; @@ -70,11 +71,6 @@ class Ver4PatriciaTrieWritingHelper { Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, int *const outBigramCount); - bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader, - Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount); - - bool truncateBigrams(const int maxBigramCount); - Ver4DictBuffers *const mBuffers; }; } // namespace latinime diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index f3bbe4ad4..8d5d6ccec 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -20,7 +20,7 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.util.Pair; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.DictDecoder; @@ -78,13 +78,13 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, final String word1, final int probability) { - binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability, + binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability, mCurrentTime /* timestamp */); } private static boolean isValidBigram(final BinaryDictionary binaryDictionary, final String word0, final String word1) { - return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1); + return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1); } private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { @@ -661,31 +661,31 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, mCurrentTime); - final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY); - binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", DUMMY_PROBABILITY, mCurrentTime); - assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); - binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY, + assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", DUMMY_PROBABILITY, mCurrentTime); addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY); - binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", DUMMY_PROBABILITY, mCurrentTime); - assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); - assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb")); + assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); + assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb")); forcePassingLongTime(binaryDictionary); - assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); - assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb")); + assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); + assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb")); addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY); - binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", DUMMY_PROBABILITY, mCurrentTime); addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY); - binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", DUMMY_PROBABILITY, mCurrentTime); - assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa")); - assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb")); + assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); + assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb")); binaryDictionary.close(); dictFile.delete(); } diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 4025744f8..9c7792cf2 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -21,7 +21,7 @@ import android.test.suitebuilder.annotation.LargeTest; import android.text.TextUtils; import android.util.Pair; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.WeightedString; @@ -208,45 +208,45 @@ public class BinaryDictionaryTests extends AndroidTestCase { private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, final String word1, final int probability) { - binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability, + binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0, final String word1, final String word2, final int probability) { - final PrevWordsInfo prevWordsInfo = - new PrevWordsInfo(new WordInfo[] { new WordInfo(word1), new WordInfo(word0) } ); - binaryDictionary.addNgramEntry(prevWordsInfo, word2, probability, + final NgramContext ngramContext = + new NgramContext(new WordInfo[] { new WordInfo(word1), new WordInfo(word0) } ); + binaryDictionary.addNgramEntry(ngramContext, word2, probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } private static boolean isValidBigram(final BinaryDictionary binaryDictionary, final String word0, final String word1) { - return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1); + return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1); } private static void removeBigramEntry(final BinaryDictionary binaryDictionary, final String word0, final String word1) { - binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1); + binaryDictionary.removeNgramEntry(new NgramContext(new WordInfo(word0)), word1); } private static void removeTrigramEntry(final BinaryDictionary binaryDictionary, final String word0, final String word1, final String word2) { - final PrevWordsInfo prevWordsInfo = - new PrevWordsInfo(new WordInfo[] { new WordInfo(word1), new WordInfo(word0) } ); - binaryDictionary.removeNgramEntry(prevWordsInfo, word2); + final NgramContext ngramContext = + new NgramContext(new WordInfo[] { new WordInfo(word1), new WordInfo(word0) } ); + binaryDictionary.removeNgramEntry(ngramContext, word2); } private static int getBigramProbability(final BinaryDictionary binaryDictionary, final String word0, final String word1) { - return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1); + return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1); } private static int getTrigramProbability(final BinaryDictionary binaryDictionary, final String word0, final String word1, final String word2) { - final PrevWordsInfo prevWordsInfo = - new PrevWordsInfo(new WordInfo[] { new WordInfo(word1), new WordInfo(word0) } ); - return binaryDictionary.getNgramProbability(prevWordsInfo, word2); + final NgramContext ngramContext = + new NgramContext(new WordInfo[] { new WordInfo(word1), new WordInfo(word0) } ); + return binaryDictionary.getNgramProbability(ngramContext, word2); } public void testAddUnigramWord() { @@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertTrue(wordProperty.isValid()); assertEquals(isNotAWord, wordProperty.mIsNotAWord); assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); - assertEquals(false, wordProperty.mHasBigrams); + assertEquals(false, wordProperty.mHasNgrams); assertEquals(false, wordProperty.mHasShortcuts); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); assertTrue(wordProperty.mShortcutTargets.isEmpty()); @@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { final HashSet<String> bigramWord1s = bigrams.get(word0); final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, false /* isBeginningOfSentence */); - assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size()); + // TODO: Support ngram. + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; assertTrue(bigramWord1s.contains(word1)); if (canCheckBigramProbability(formatVersion)) { final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); - assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + assertEquals(bigramProbability, bigramTarget.getProbability()); } } } @@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { wordProperty.mProbabilityInfo.mProbability); wordSet.remove(word0); final HashSet<String> bigramWord1s = bigrams.get(word0); - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + // TODO: Support ngram. + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; assertTrue(bigramWord1s.contains(word1)); final Pair<String, String> bigram = new Pair<>(word0, word1); if (canCheckBigramProbability(formatVersion)) { final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); - assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + assertEquals(bigramProbability, bigramTarget.getProbability()); } bigramSet.remove(bigram); } @@ -1422,7 +1424,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); - binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE, + binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE, "aaa", bigramProbability, 0 /* timestamp */); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); @@ -1436,7 +1438,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (canCheckBigramProbability(toFormatVersion)) { assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); assertEquals(bigramProbability, binaryDictionary.getNgramProbability( - PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa")); + NgramContext.BEGINNING_OF_SENTENCE, "aaa")); } assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", @@ -1546,23 +1548,23 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int dummyProbability = 0; - final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; final int bigramProbability = 200; addUnigramWord(binaryDictionary, "aaa", dummyProbability); - binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); - binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); addUnigramWord(binaryDictionary, "bbb", dummyProbability); - binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); binaryDictionary.flushWithGC(); assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); + binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb")); + binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb")); } public void testGetMaxFrequencyOfExactMatches() { diff --git a/tests/src/com/android/inputmethod/latin/BlueUnderlineTests.java b/tests/src/com/android/inputmethod/latin/BlueUnderlineTests.java index 6e894decf..30b088137 100644 --- a/tests/src/com/android/inputmethod/latin/BlueUnderlineTests.java +++ b/tests/src/com/android/inputmethod/latin/BlueUnderlineTests.java @@ -63,7 +63,7 @@ public class BlueUnderlineTests extends InputTestsBase { final int typedLength = STRING_TO_TYPE.length(); final int EXPECTED_SUGGESTION_SPAN_START = -1; final int EXPECTED_UNDERLINE_SPAN_START = 0; - final int EXPECTED_UNDERLINE_SPAN_END = 4; + final int EXPECTED_UNDERLINE_SPAN_END = 3; type(STRING_TO_TYPE); sleep(DELAY_TO_WAIT_FOR_UNDERLINE); runMessages(); diff --git a/tests/src/com/android/inputmethod/latin/InputLogicTests.java b/tests/src/com/android/inputmethod/latin/InputLogicTests.java index 59b858dbd..ec249dab3 100644 --- a/tests/src/com/android/inputmethod/latin/InputLogicTests.java +++ b/tests/src/com/android/inputmethod/latin/InputLogicTests.java @@ -159,8 +159,11 @@ public class InputLogicTests extends InputTestsBase { } public void testAutoCorrectWithSpaceThenRevert() { + // Backspacing to cancel the "tgis"->"this" autocorrection should result in + // a "phantom space": if the user presses space immediately after, + // only one space will be inserted in total. final String STRING_TO_TYPE = "tgis "; - final String EXPECTED_RESULT = "tgis "; + final String EXPECTED_RESULT = "tgis"; type(STRING_TO_TYPE); mLatinIME.onUpdateSelection(0, 0, STRING_TO_TYPE.length(), STRING_TO_TYPE.length(), -1, -1); type(Constants.CODE_DELETE); @@ -168,6 +171,24 @@ public class InputLogicTests extends InputTestsBase { mEditText.getText().toString()); } + public void testAutoCorrectWithSpaceThenRevertThenTypeMore() { + final String STRING_TO_TYPE_FIRST = "tgis "; + final String STRING_TO_TYPE_SECOND = "a"; + final String EXPECTED_RESULT = "tgis a"; + type(STRING_TO_TYPE_FIRST); + mLatinIME.onUpdateSelection(0, 0, + STRING_TO_TYPE_FIRST.length(), STRING_TO_TYPE_FIRST.length(), -1, -1); + type(Constants.CODE_DELETE); + + type(STRING_TO_TYPE_SECOND); + mLatinIME.onUpdateSelection(STRING_TO_TYPE_FIRST.length(), STRING_TO_TYPE_FIRST.length(), + STRING_TO_TYPE_FIRST.length() - 1 + STRING_TO_TYPE_SECOND.length(), + STRING_TO_TYPE_FIRST.length() - 1 + STRING_TO_TYPE_SECOND.length(), + -1, -1); + assertEquals("auto-correct with space then revert then type more", EXPECTED_RESULT, + mEditText.getText().toString()); + } + public void testAutoCorrectToSelfDoesNotRevert() { final String STRING_TO_TYPE = "this "; final String EXPECTED_RESULT = "this"; diff --git a/tests/src/com/android/inputmethod/latin/NgramContextTests.java b/tests/src/com/android/inputmethod/latin/NgramContextTests.java new file mode 100644 index 000000000..ecc2c634d --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/NgramContextTests.java @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import com.android.inputmethod.latin.NgramContext.WordInfo; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.SmallTest; + +@SmallTest +public class NgramContextTests extends AndroidTestCase { + public void testConstruct() { + assertEquals(new NgramContext(new WordInfo("a")), new NgramContext(new WordInfo("a"))); + assertEquals(new NgramContext(WordInfo.BEGINNING_OF_SENTENCE), + new NgramContext(WordInfo.BEGINNING_OF_SENTENCE)); + assertEquals(new NgramContext(WordInfo.EMPTY_WORD_INFO), + new NgramContext(WordInfo.EMPTY_WORD_INFO)); + assertEquals(new NgramContext(WordInfo.EMPTY_WORD_INFO), + new NgramContext(WordInfo.EMPTY_WORD_INFO)); + } + + public void testIsBeginningOfSentenceContext() { + assertFalse(new NgramContext().isBeginningOfSentenceContext()); + assertTrue(new NgramContext(WordInfo.BEGINNING_OF_SENTENCE) + .isBeginningOfSentenceContext()); + assertTrue(NgramContext.BEGINNING_OF_SENTENCE.isBeginningOfSentenceContext()); + assertFalse(new NgramContext(new WordInfo("a")).isBeginningOfSentenceContext()); + assertFalse(new NgramContext(new WordInfo("")).isBeginningOfSentenceContext()); + assertFalse(new NgramContext(WordInfo.EMPTY_WORD_INFO).isBeginningOfSentenceContext()); + assertTrue(new NgramContext(WordInfo.BEGINNING_OF_SENTENCE, new WordInfo("a")) + .isBeginningOfSentenceContext()); + assertFalse(new NgramContext(new WordInfo("a"), WordInfo.BEGINNING_OF_SENTENCE) + .isBeginningOfSentenceContext()); + assertFalse(new NgramContext(WordInfo.EMPTY_WORD_INFO, WordInfo.BEGINNING_OF_SENTENCE) + .isBeginningOfSentenceContext()); + } + + public void testGetNextNgramContext() { + final NgramContext ngramContext_a = new NgramContext(new WordInfo("a")); + final NgramContext ngramContext_b_a = + ngramContext_a.getNextNgramContext(new WordInfo("b")); + assertEquals("b", ngramContext_b_a.getNthPrevWord(1)); + assertEquals("a", ngramContext_b_a.getNthPrevWord(2)); + final NgramContext ngramContext_bos_b = + ngramContext_b_a.getNextNgramContext(WordInfo.BEGINNING_OF_SENTENCE); + assertTrue(ngramContext_bos_b.isBeginningOfSentenceContext()); + assertEquals("b", ngramContext_bos_b.getNthPrevWord(2)); + final NgramContext ngramContext_c_bos = + ngramContext_b_a.getNextNgramContext(new WordInfo("c")); + assertEquals("c", ngramContext_c_bos.getNthPrevWord(1)); + } +} diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java index f9d72269e..7a3233625 100644 --- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java +++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java @@ -30,9 +30,8 @@ import android.view.inputmethod.ExtractedTextRequest; import android.view.inputmethod.InputConnection; import android.view.inputmethod.InputConnectionWrapper; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; -import com.android.inputmethod.latin.utils.PrevWordsInfoUtils; +import com.android.inputmethod.latin.utils.NgramContextUtils; import com.android.inputmethod.latin.utils.RunInLocale; import com.android.inputmethod.latin.utils.ScriptUtils; import com.android.inputmethod.latin.utils.StringUtils; @@ -158,26 +157,25 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { */ public void testGetPreviousWord() { // If one of the following cases breaks, the bigram suggestions won't work. - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc. def", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); - - assertFalse(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); - assertTrue(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 2).getNthPrevWord(1), "abc"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc", mSpacingAndPunctuations, 2), NgramContext.BEGINNING_OF_SENTENCE); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc. def", mSpacingAndPunctuations, 2), NgramContext.BEGINNING_OF_SENTENCE); + + assertFalse(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 2).isBeginningOfSentenceContext()); + assertTrue(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc", mSpacingAndPunctuations, 2).isBeginningOfSentenceContext()); // For n-gram - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1], - WordInfo.BEGINNING_OF_SENTENCE); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 1).getNthPrevWord(1), "def"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 1).getNthPrevWord(2), "abc"); + assertTrue(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 2).isNthPrevWordBeginningOfSontence(2)); // The following tests reflect the current behavior of the function // RichInputConnection#getNthPreviousWord. @@ -186,33 +184,33 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { // this function if needed - especially since it does not seem very // logical. These tests are just there to catch any unintentional // changes in the behavior of the RichInputConnection#getPreviousWord method. - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def ", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def.", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def .", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "def"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc ", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); - - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def ", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc 'def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "'def"); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def.", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc def .", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc, def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc? def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc! def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); - assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( - "abc 'def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def ", mSpacingAndPunctuations, 2).getNthPrevWord(1), "abc"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def.", mSpacingAndPunctuations, 2).getNthPrevWord(1), "abc"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def .", mSpacingAndPunctuations, 2).getNthPrevWord(1), "def"); + assertTrue(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc ", mSpacingAndPunctuations, 2).isBeginningOfSentenceContext()); + + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 1).getNthPrevWord(1), "def"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def ", mSpacingAndPunctuations, 1).getNthPrevWord(1), "def"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc 'def", mSpacingAndPunctuations, 1).getNthPrevWord(1), "'def"); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def.", mSpacingAndPunctuations, 1), NgramContext.BEGINNING_OF_SENTENCE); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc def .", mSpacingAndPunctuations, 1), NgramContext.BEGINNING_OF_SENTENCE); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc, def", mSpacingAndPunctuations, 2), NgramContext.EMPTY_PREV_WORDS_INFO); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc? def", mSpacingAndPunctuations, 2), NgramContext.EMPTY_PREV_WORDS_INFO); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc! def", mSpacingAndPunctuations, 2), NgramContext.EMPTY_PREV_WORDS_INFO); + assertEquals(NgramContextUtils.getNgramContextFromNthPreviousWord( + "abc 'def", mSpacingAndPunctuations, 2), NgramContext.EMPTY_PREV_WORDS_INFO); } public void testGetWordRangeAtCursor() { diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 406046a74..f8b68e0ce 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } assertTrue(shortcutList.isEmpty()); } - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; final Pair<String, String> bigram = new Pair<>(word0, word1); assertTrue(bigramSet.contains(bigram)); bigramSet.remove(bigram); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 2b3fd892a..12290e6aa 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -27,6 +27,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map.Entry; /** * Encodes binary files for a FusionDictionary. @@ -791,10 +793,12 @@ public class BinaryDictEncoderUtils { * @param destination the stream to write the file header to. * @param dict the dictionary to write. * @param formatOptions file format options. + * @param codePointOccurrenceArray code points ordered by occurrence count. * @return the size of the header. */ /* package */ static int writeDictionaryHeader(final OutputStream destination, - final FusionDictionary dict, final FormatOptions formatOptions) + final FusionDictionary dict, final FormatOptions formatOptions, + final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray) throws IOException, UnsupportedFormatException { final int version = formatOptions.mVersion; if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION @@ -833,6 +837,9 @@ public class BinaryDictEncoderUtils { CharEncoding.writeString(headerBuffer, key); CharEncoding.writeString(headerBuffer, value); } + + // TODO: Write out the code point table. + final int size = headerBuffer.size(); final byte[] bytes = headerBuffer.toByteArray(); // Write out the header size. @@ -845,4 +852,15 @@ public class BinaryDictEncoderUtils { headerBuffer.close(); return size; } + + static final class CodePointTable { + final HashMap<Integer, Integer> mCodePointToOneByteCodeMap; + final ArrayList<Entry<Integer, Integer>> mCodePointOccurrenceArray; + + CodePointTable(final HashMap<Integer, Integer> codePointToOneByteCodeMap, + final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray) { + mCodePointToOneByteCodeMap = codePointToOneByteCodeMap; + mCodePointOccurrenceArray = codePointOccurrenceArray; + } + } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index 65b84d5f7..18f4bcf5f 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder { } // Insert bigrams into the fusion dictionary. for (final WordProperty wordProperty : wordProperties) { - if (wordProperty.mBigrams == null) { + if (!wordProperty.mHasNgrams) { continue; } final String word0 = wordProperty.mWord; - for (final WeightedString bigram : wordProperty.mBigrams) { + for (final WeightedString bigram : wordProperty.getBigrams()) { fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java index 0fa75e8ee..c47190190 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java @@ -18,6 +18,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -28,7 +29,11 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; import java.util.Iterator; +import java.util.Map.Entry; /** * An implementation of DictEncoder for version 2 binary dictionary. @@ -73,6 +78,49 @@ public class Ver2DictEncoder implements DictEncoder { } } + // Package for testing + static CodePointTable makeCodePointTable(final FusionDictionary dict) { + final HashMap<Integer, Integer> codePointOccurrenceCounts = new HashMap<>(); + for (final WordProperty word : dict) { + // Store per code point occurrence + final String wordString = word.mWord; + for (int i = 0; i < wordString.length(); ++i) { + final int codePoint = Character.codePointAt(wordString, i); + if (codePointOccurrenceCounts.containsKey(codePoint)) { + codePointOccurrenceCounts.put(codePoint, + codePointOccurrenceCounts.get(codePoint) + 1); + } else { + codePointOccurrenceCounts.put(codePoint, 1); + } + } + } + final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray = + new ArrayList<>(codePointOccurrenceCounts.entrySet()); + // Descending order sort by occurrence (value side) + Collections.sort(codePointOccurrenceArray, new Comparator<Entry<Integer, Integer>>() { + @Override + public int compare(final Entry<Integer, Integer> a, final Entry<Integer, Integer> b) { + if (a.getValue() != b.getValue()) { + return b.getValue().compareTo(a.getValue()); + } + return b.getKey().compareTo(a.getKey()); + } + }); + int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE; + // Temporary map for writing of nodes + final HashMap<Integer, Integer> codePointToOneByteCodeMap = new HashMap<>(); + for (final Entry<Integer, Integer> entry : codePointOccurrenceArray) { + // Put a relation from the original code point to the one byte code. + codePointToOneByteCodeMap.put(entry.getKey(), currentCodePointTableIndex); + if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) { + break; + } + } + // codePointToOneByteCodeMap for writing the trie + // codePointOccurrenceArray for writing the header + return new CodePointTable(codePointToOneByteCodeMap, codePointOccurrenceArray); + } + @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { @@ -85,7 +133,12 @@ public class Ver2DictEncoder implements DictEncoder { if (mOutStream == null) { openStream(); } - BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions); + + // Make code point conversion table ordered by occurrence of code points + final CodePointTable codePointTable = makeCodePointTable(dict); + + BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions, + codePointTable.mCodePointOccurrenceArray); // Addresses are limited to 3 bytes, but since addresses can be relative to each node // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java new file mode 100644 index 000000000..9104c2fcb --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map.Entry; + +import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Log; + +/** + * Unit tests for Ver2DictEncoder + */ +@LargeTest +public class Ver2DictEncoderTests extends AndroidTestCase { + private static final String TAG = Ver2DictEncoderTests.class.getSimpleName(); + private static final int UNIGRAM_FREQ = 10; + + public void testCodePointTable() { + final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"}; + final List<String> words = Arrays.asList(wordSource); + final String correctCodePointTable = "eotdsanirfg bclwup"; + final String correctCodePointOccurrenceArrayString = + "11641114101411531003114211021052972119111711121108110311021991981321"; + final String correctCodePointExpectedMapString = "343332363540383937464549484744414243"; + final String dictName = "codePointTableTest"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + + final FormatSpec.FormatOptions formatOptions = + new FormatSpec.FormatOptions(FormatSpec.VERSION2); + final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), + BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); + addUnigrams(sourcedict, words, null /* shortcutMap */); + final CodePointTable codePointTable = Ver2DictEncoder.makeCodePointTable(sourcedict); + + // Check if mCodePointOccurrenceArray is correct + final StringBuilder codePointOccurrenceArrayString = new StringBuilder(); + for (Entry<Integer, Integer> entry : codePointTable.mCodePointOccurrenceArray) { + codePointOccurrenceArrayString.append(entry.getKey()); + codePointOccurrenceArrayString.append(entry.getValue()); + } + assertEquals(correctCodePointOccurrenceArrayString, + codePointOccurrenceArrayString.toString()); + + // Check if mCodePointToOneByteCodeMap is correct + final StringBuilder codePointExpectedMapString = new StringBuilder(); + for (int i = 0; i < correctCodePointTable.length(); ++i) { + codePointExpectedMapString.append(codePointTable.mCodePointToOneByteCodeMap.get( + correctCodePointTable.codePointAt(i))); + } + assertEquals(correctCodePointExpectedMapString, codePointExpectedMapString.toString()); + } + + /** + * Adds unigrams to the dictionary. + */ + private void addUnigrams(final FusionDictionary dict, final List<String> words, + final HashMap<String, List<String>> shortcutMap) { + for (final String word : words) { + final ArrayList<WeightedString> shortcuts = new ArrayList<>(); + if (shortcutMap != null && shortcutMap.containsKey(word)) { + for (final String shortcut : shortcutMap.get(word)) { + shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); + } + } + dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), + (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); + } + } +} diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 5e8417ed6..0da915a75 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } } // Insert bigrams into the fusion dictionary. + // TODO: Support ngrams. for (final WordProperty wordProperty : wordProperties) { - if (wordProperty.mBigrams == null) { + if (!wordProperty.mHasNgrams) { continue; } final String word0 = wordProperty.mWord; - for (final WeightedString bigram : wordProperty.mBigrams) { + for (final WeightedString bigram : wordProperty.getBigrams()) { fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 2e5435159..401ffde6d 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; @@ -102,14 +102,15 @@ public class Ver4DictEncoder implements DictEncoder { } } for (final WordProperty word0Property : dict) { - if (null == word0Property.mBigrams) continue; - for (final WeightedString word1 : word0Property.mBigrams) { - final PrevWordsInfo prevWordsInfo = - new PrevWordsInfo(new PrevWordsInfo.WordInfo(word0Property.mWord)); - if (!binaryDict.addNgramEntry(prevWordsInfo, word1.mWord, + if (!word0Property.mHasNgrams) continue; + // TODO: Support ngram. + for (final WeightedString word1 : word0Property.getBigrams()) { + final NgramContext ngramContext = + new NgramContext(new NgramContext.WordInfo(word0Property.mWord)); + if (!binaryDict.addNgramEntry(ngramContext, word1.mWord, word1.getProbability(), 0 /* timestamp */)) { MakedictLog.e("Cannot add n-gram entry for " - + prevWordsInfo + " -> " + word1.mWord); + + ngramContext + " -> " + word1.mWord); return; } if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java index 616209682..766627334 100644 --- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java @@ -21,8 +21,8 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; import com.android.inputmethod.latin.ExpandableBinaryDictionary; -import com.android.inputmethod.latin.PrevWordsInfo; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; +import com.android.inputmethod.latin.NgramContext; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; import com.android.inputmethod.latin.utils.DistracterFilter; import com.android.inputmethod.latin.utils.FileUtils; @@ -159,13 +159,13 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { return new ArrayList<>(wordSet); } - private static void addToDict(final UserHistoryDictionary dict, final List<String> words) { - PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + private static void addToDict(final UserHistoryDictionary dict, final List<String> words, + final int timestamp) { + NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; for (String word : words) { - UserHistoryDictionary.addToDictionary(dict, prevWordsInfo, word, true, - (int)TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), + UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, timestamp, DistracterFilter.EMPTY_DISTRACTER_FILTER); - prevWordsInfo = prevWordsInfo.getNextPrevWordsInfo(new WordInfo(word)); + ngramContext = ngramContext.getNextNgramContext(new WordInfo(word)); } } @@ -177,7 +177,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { final int numberOfWords, final Random random, final boolean checkContents) { final List<String> words = generateWords(numberOfWords, random); // Add random words to the user history dictionary. - addToDict(dict, words); + addToDict(dict, words, mCurrentTime); if (checkContents) { dict.waitAllTasksForTests(); for (int i = 0; i < numberOfWords; ++i) { @@ -288,11 +288,11 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { clearHistory(dict); final List<String> words = generateWords(numberOfWords, random); dict.waitAllTasksForTests(); - PrevWordsInfo prevWordsInfo = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; for (final String word : words) { - UserHistoryDictionary.addToDictionary(dict, prevWordsInfo, word, true, mCurrentTime, + UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, mCurrentTime, DistracterFilter.EMPTY_DISTRACTER_FILTER); - prevWordsInfo = prevWordsInfo.getNextPrevWordsInfo(new WordInfo(word)); + ngramContext = ngramContext.getNextNgramContext(new WordInfo(word)); dict.waitAllTasksForTests(); assertTrue(dict.isInDictionary(word)); } @@ -308,6 +308,5 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { for (final String word : words) { assertFalse(dict.isInDictionary(word)); } - stopTestModeInNativeCode(); } } diff --git a/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java index 6ed912088..8360d53fb 100644 --- a/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java +++ b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java @@ -24,7 +24,7 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.view.inputmethod.InputMethodSubtype; -import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.RichInputMethodManager; import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; @@ -58,7 +58,7 @@ public class DistracterFilterTest extends AndroidTestCase { } public void testIsDistracterToWordsInDictionaries() { - final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + final NgramContext EMPTY_PREV_WORDS_INFO = NgramContext.EMPTY_PREV_WORDS_INFO; final Locale localeEnUs = new Locale("en", "US"); String typedWord; @@ -204,7 +204,7 @@ public class DistracterFilterTest extends AndroidTestCase { public void testGetWordHandlingType() { final Locale localeEnUs = new Locale("en", "US"); - final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + final NgramContext EMPTY_PREV_WORDS_INFO = NgramContext.EMPTY_PREV_WORDS_INFO; int handlingType = 0; handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index 3e3d419e6..7f34ccf20 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -50,7 +50,7 @@ LATINIME_SRC_FILES_FOR_DICTTOOL := \ latin/Dictionary.java \ latin/InputPointers.java \ latin/LastComposedWord.java \ - latin/PrevWordsInfo.java \ + latin/NgramContext.java \ latin/SuggestedWords.java \ latin/WordComposer.java \ latin/settings/NativeSuggestOptions.java \ diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 94d1ae8bb..c6818ce0c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command { hasDifferences = true; } hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, - "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); + "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams()); hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, "Shortcut", word0Property.mShortcutTargets, word1PtNode.getShortcutTargets()); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 9b2567fd3..2850e1ff6 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -45,8 +45,8 @@ public class Info extends Dicttool.Command { int whitelistCount = 0; for (final WordProperty wordProperty : dict) { ++wordCount; - if (null != wordProperty.mBigrams) { - bigramCount += wordProperty.mBigrams.size(); + if (wordProperty.mHasNgrams) { + bigramCount += wordProperty.mNgrams.size(); } if (null != wordProperty.mShortcutTargets) { shortcutCount += wordProperty.mShortcutTargets.size(); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index bdec44761..cd3ce70eb 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -353,7 +353,7 @@ public class XmlDictInputOutput { + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">"); - if (null != wordProperty.mShortcutTargets) { + if (wordProperty.mHasShortcuts) { destination.write("\n"); for (WeightedString target : wordProperty.mShortcutTargets) { destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" @@ -362,9 +362,9 @@ public class XmlDictInputOutput { } destination.write(" "); } - if (null != wordProperty.mBigrams) { + if (wordProperty.mHasNgrams) { destination.write("\n"); - for (WeightedString bigram : wordProperty.mBigrams) { + for (WeightedString bigram : wordProperty.getBigrams()) { destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" + bigram.getProbability() + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n"); |