diff options
18 files changed, 438 insertions, 250 deletions
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 2e5bb19d6..6445b61ca 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1779,9 +1779,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mInputUpdater.onStartBatchInput(); mHandler.cancelUpdateSuggestionStrip(); mConnection.beginBatchEdit(); - final SettingsValues settingsValues = mSettings.getCurrent(); + final SettingsValues currentSettingsValues = mSettings.getCurrent(); if (mWordComposer.isComposingWord()) { - if (settingsValues.mIsInternal) { + if (currentSettingsValues.mIsInternal) { if (mWordComposer.isBatchMode()) { LatinImeLoggerUtils.onAutoCorrection( "", mWordComposer.getTypedWord(), " ", mWordComposer); @@ -1808,12 +1808,14 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (Character.isLetterOrDigit(codePointBeforeCursor) - || settingsValues.isUsuallyFollowedBySpace(codePointBeforeCursor)) { + || currentSettingsValues.isUsuallyFollowedBySpace(codePointBeforeCursor)) { mSpaceState = SPACE_STATE_PHANTOM; } mConnection.endBatchEdit(); mKeyboardSwitcher.updateShiftState(); - mWordComposer.setCapitalizedModeAtStartComposingTime(getActualCapsMode()); + mWordComposer.setCapitalizedModeAndPreviousWordAtStartComposingTime(getActualCapsMode(), + // Prev word is 1st word before cursor + getNthPreviousWordForSuggestion(currentSettingsValues, 1 /* nthPreviousWord */)); } static final class InputUpdater implements Handler.Callback { @@ -1986,7 +1988,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mConnection.commitText(commitParts[0], 0); mSpaceState = SPACE_STATE_PHANTOM; mKeyboardSwitcher.updateShiftState(); - mWordComposer.setCapitalizedModeAtStartComposingTime(getActualCapsMode()); + mWordComposer.setCapitalizedModeAndPreviousWordAtStartComposingTime( + getActualCapsMode(), commitParts[0]); ++mAutoCommitSequenceNumber; } } @@ -2295,7 +2298,11 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mWordComposer.add(primaryCode, keyX, keyY); // If it's the first letter, make note of auto-caps state if (mWordComposer.size() == 1) { - mWordComposer.setCapitalizedModeAtStartComposingTime(getActualCapsMode()); + // We pass 1 to getPreviousWordForSuggestion because we were not composing a word + // yet, so the word we want is the 1st word before the cursor. + mWordComposer.setCapitalizedModeAndPreviousWordAtStartComposingTime( + getActualCapsMode(), + getNthPreviousWordForSuggestion(currentSettings, 1 /* nthPreviousWord */)); } mConnection.setComposingText(getTextWithUnderline(mWordComposer.getTypedWord()), 1); } else { @@ -2537,12 +2544,18 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } } - private String getPreviousWordForSuggestion(final SettingsValues currentSettings) { + /** + * Get the nth previous word before the cursor as context for the suggestion process. + * @param currentSettings the current settings values. + * @param nthPreviousWord reverse index of the word to get (1-indexed) + * @return the nth previous word before the cursor. + */ + private String getNthPreviousWordForSuggestion(final SettingsValues currentSettings, + final int nthPreviousWord) { if (currentSettings.mCurrentLanguageHasSpaces) { // If we are typing in a language with spaces we can just look up the previous // word from textview. - return mConnection.getNthPreviousWord(currentSettings.mWordSeparators, - mWordComposer.isComposingWord() ? 2 : 1); + return mConnection.getNthPreviousWord(currentSettings, nthPreviousWord); } else { return LastComposedWord.NOT_A_COMPOSED_WORD == mLastComposedWord ? null : mLastComposedWord.mCommittedWord; @@ -2562,8 +2575,31 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // should just skip whitespace if any, so 1. final SettingsValues currentSettings = mSettings.getCurrent(); final int[] additionalFeaturesOptions = currentSettings.mAdditionalFeaturesSettingValues; - final String prevWord = getPreviousWordForSuggestion(currentSettings); - suggest.getSuggestedWords(mWordComposer, prevWord, keyboard.getProximityInfo(), + + final String previousWord; + if (mWordComposer.isComposingWord() || mWordComposer.isBatchMode()) { + previousWord = mWordComposer.getPreviousWord(); + } else { + // Not composing: this is for prediction. + // TODO: read the previous word earlier for prediction, like we are doing for + // normal suggestions. + previousWord = getNthPreviousWordForSuggestion(currentSettings, 1 /* nthPreviousWord*/); + } + if (DEBUG) { + // TODO: this is for checking consistency with older versions. Remove this when + // we are confident this is stable. + // We're checking the previous word in the text field against the memorized previous + // word. If we are composing a word we should have the second word before the cursor + // memorized, otherwise we should have the first. + final String rereadPrevWord = getNthPreviousWordForSuggestion(currentSettings, + mWordComposer.isComposingWord() ? 2 : 1); + if (!TextUtils.equals(previousWord, rereadPrevWord)) { + throw new RuntimeException("Unexpected previous word: " + + previousWord + " <> " + rereadPrevWord); + } + } + suggest.getSuggestedWords(mWordComposer, mWordComposer.getPreviousWord(), + keyboard.getProximityInfo(), currentSettings.mBlockPotentiallyOffensive, currentSettings.mCorrectionEnabled, additionalFeaturesOptions, sessionId, sequenceNumber, callback); } @@ -2832,7 +2868,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen final UserHistoryDictionary userHistoryDictionary = mUserHistoryDictionary; if (userHistoryDictionary == null) return null; - final String prevWord = mConnection.getNthPreviousWord(currentSettings.mWordSeparators, 2); + final String prevWord = mConnection.getNthPreviousWord(currentSettings, 2); final String secondWord; if (mWordComposer.wasAutoCapitalized() && !mWordComposer.isMostlyCaps()) { secondWord = suggestion.toLowerCase(mSubtypeSwitcher.getCurrentSubtypeLocale()); @@ -2900,7 +2936,13 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } } } - mWordComposer.setComposingWord(typedWord, mKeyboardSwitcher.getKeyboard()); + mWordComposer.setComposingWord(typedWord, + getNthPreviousWordForSuggestion(currentSettings, + // We want the previous word for suggestion. If we have chars in the word + // before the cursor, then we want the word before that, hence 2; otherwise, + // we want the word immediately before the cursor, hence 1. + 0 == numberOfCharsInWordBeforeCursor ? 1 : 2), + mKeyboardSwitcher.getKeyboard()); mWordComposer.setCursorPositionWithinWord( typedWord.codePointCount(0, numberOfCharsInWordBeforeCursor)); mConnection.setComposingRegion( @@ -2978,7 +3020,11 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } private void restartSuggestionsOnWordBeforeCursor(final String word) { - mWordComposer.setComposingWord(word, mKeyboardSwitcher.getKeyboard()); + mWordComposer.setComposingWord(word, + // Previous word is the 2nd word before cursor because we are restarting on the + // 1st word before cursor. + getNthPreviousWordForSuggestion(mSettings.getCurrent(), 2 /* nthPreviousWord */), + mKeyboardSwitcher.getKeyboard()); final int length = word.length(); mConnection.deleteSurroundingText(length, 0); mConnection.setComposingText(word, 1); @@ -3044,7 +3090,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } else { // For languages without spaces, we revert the typed string but the cursor is flush // with the typed word, so we need to resume suggestions right away. - mWordComposer.setComposingWord(stringToCommit, mKeyboardSwitcher.getKeyboard()); + mWordComposer.setComposingWord(stringToCommit, previousWord, + mKeyboardSwitcher.getKeyboard()); mConnection.setComposingText(stringToCommit, 1); } if (mSettings.isInternal()) { diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index b5ea0de01..37311acf2 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -538,7 +538,7 @@ public final class RichInputConnection { } @SuppressWarnings("unused") - public String getNthPreviousWord(final String sentenceSeperators, final int n) { + public String getNthPreviousWord(final SettingsValues currentSettingsValues, final int n) { mIC = mParent.getCurrentInputConnection(); if (null == mIC) return null; final CharSequence prev = getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0); @@ -557,7 +557,7 @@ public final class RichInputConnection { } } } - return getNthPreviousWord(prev, sentenceSeperators, n); + return getNthPreviousWord(prev, currentSettingsValues, n); } private static boolean isSeparator(int code, String sep) { @@ -581,7 +581,7 @@ public final class RichInputConnection { // (n = 2) "abc |" -> null // (n = 2) "abc. def|" -> null public static String getNthPreviousWord(final CharSequence prev, - final String sentenceSeperators, final int n) { + final SettingsValues currentSettingsValues, final int n) { if (prev == null) return null; final String[] w = spaceRegex.split(prev); @@ -593,7 +593,8 @@ public final class RichInputConnection { // If ends in a separator, return null final char lastChar = nthPrevWord.charAt(length - 1); - if (sentenceSeperators.contains(String.valueOf(lastChar))) return null; + if (currentSettingsValues.isWordSeparator(lastChar) + || currentSettingsValues.isWordConnector(lastChar)) return null; return nthPrevWord; } diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index 039dadc66..2f81d15d5 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -48,6 +48,10 @@ public final class WordComposer { // at any given time. However this is not limited in size, while mPrimaryKeyCodes is limited // to MAX_WORD_LENGTH code points. private final StringBuilder mTypedWord; + // The previous word (before the composing word). Used as context for suggestions. May be null + // after resetting and before starting a new composing word, or when there is no context like + // at the start of text for example. + private String mPreviousWord; private String mAutoCorrection; private boolean mIsResumed; private boolean mIsBatchMode; @@ -85,6 +89,7 @@ public final class WordComposer { mIsBatchMode = false; mCursorPositionWithinWord = 0; mRejectedBatchModeSuggestion = null; + mPreviousWord = null; refreshSize(); } @@ -101,6 +106,7 @@ public final class WordComposer { mIsBatchMode = source.mIsBatchMode; mCursorPositionWithinWord = source.mCursorPositionWithinWord; mRejectedBatchModeSuggestion = source.mRejectedBatchModeSuggestion; + mPreviousWord = source.mPreviousWord; refreshSize(); } @@ -118,6 +124,7 @@ public final class WordComposer { mIsBatchMode = false; mCursorPositionWithinWord = 0; mRejectedBatchModeSuggestion = null; + mPreviousWord = null; refreshSize(); } @@ -284,8 +291,13 @@ public final class WordComposer { /** * Set the currently composing word to the one passed as an argument. * This will register NOT_A_COORDINATE for X and Ys, and use the passed keyboard for proximity. + * @param word the char sequence to set as the composing word. + * @param previousWord the previous word, to use as context for suggestions. Can be null if + * the context is nil (typically, at start of text). + * @param keyboard the keyboard this is typed on, for coordinate info/proximity. */ - public void setComposingWord(final CharSequence word, final Keyboard keyboard) { + public void setComposingWord(final CharSequence word, final String previousWord, + final Keyboard keyboard) { reset(); final int length = word.length(); for (int i = 0; i < length; i = Character.offsetByCodePoints(word, i, 1)) { @@ -293,6 +305,7 @@ public final class WordComposer { addKeyInfo(codePoint, keyboard); } mIsResumed = true; + mPreviousWord = previousWord; } /** @@ -343,6 +356,10 @@ public final class WordComposer { return mTypedWord.toString(); } + public String getPreviousWord() { + return mPreviousWord; + } + /** * Whether or not the user typed a capital letter as the first letter in the word * @return capitalization preference @@ -388,18 +405,21 @@ public final class WordComposer { } /** - * Saves the caps mode at the start of composing. + * Saves the caps mode and the previous word at the start of composing. * - * WordComposer needs to know about this for several reasons. The first is, we need to know - * after the fact what the reason was, to register the correct form into the user history - * dictionary: if the word was automatically capitalized, we should insert it in all-lower - * case but if it's a manual pressing of shift, then it should be inserted as is. + * WordComposer needs to know about the caps mode for several reasons. The first is, we need + * to know after the fact what the reason was, to register the correct form into the user + * history dictionary: if the word was automatically capitalized, we should insert it in + * all-lower case but if it's a manual pressing of shift, then it should be inserted as is. * Also, batch input needs to know about the current caps mode to display correctly * capitalized suggestions. * @param mode the mode at the time of start + * @param previousWord the previous word as context for suggestions. May be null if none. */ - public void setCapitalizedModeAtStartComposingTime(final int mode) { + public void setCapitalizedModeAndPreviousWordAtStartComposingTime(final int mode, + final String previousWord) { mCapitalizedMode = mode; + mPreviousWord = previousWord; } /** @@ -451,6 +471,7 @@ public final class WordComposer { mCapsCount = 0; mDigitsCount = 0; mIsBatchMode = false; + mPreviousWord = mTypedWord.toString(); mTypedWord.setLength(0); mCodePointSize = 0; mTrailingSingleQuotesCount = 0; @@ -464,7 +485,8 @@ public final class WordComposer { return lastComposedWord; } - public void resumeSuggestionOnLastComposedWord(final LastComposedWord lastComposedWord) { + public void resumeSuggestionOnLastComposedWord(final LastComposedWord lastComposedWord, + final String previousWord) { mPrimaryKeyCodes = lastComposedWord.mPrimaryKeyCodes; mInputPointers.set(lastComposedWord.mInputPointers); mTypedWord.setLength(0); @@ -475,6 +497,7 @@ public final class WordComposer { mCursorPositionWithinWord = mCodePointSize; mRejectedBatchModeSuggestion = null; mIsResumed = true; + mPreviousWord = previousWord; } public boolean isBatchMode() { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h index 609b8ab07..9af5a3730 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -57,6 +57,9 @@ class PtNodeWriter { virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) = 0; + virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos) = 0; + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, bool *const outAddedNewBigram) = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp index 36b2049fe..aa200b26d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -105,9 +106,11 @@ bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability( if (!toBeUpdatedPtNodeParams->isTerminal()) { return false; } + const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(), + newProbability); int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos(); return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, - newProbability, &probabilityFieldPos); + probabilityToWrite, &probabilityFieldPos); } bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition( @@ -119,67 +122,24 @@ bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition( bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { - const int nodePos = *ptNodeWritingPos; - // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the - // PtNode writing. - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, - 0 /* nodeFlags */, ptNodeWritingPos)) { - return false; - } - // Calculate a parent offset and write the offset. - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer, - ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { - return false; - } - // Write code points - if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, - ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, + 0 /* outProbabilityFieldPos */, ptNodeWritingPos); +} + +bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { + int probabilityFieldPos = NOT_A_DICT_POS; + if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos, + ptNodeWritingPos)) { return false; } - // Write probability when the probability is a valid probability, which means this node is - // terminal. - if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) { - if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, - ptNodeParams->getProbability(), ptNodeWritingPos)) { - return false; - } - } - // Write children position - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, - ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + if (probabilityFieldPos == NOT_A_DICT_POS) { return false; } - // Copy shortcut list when the originalShortcutListPos is valid dictionary position. - if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) { - int fromPos = ptNodeParams->getShortcutPos(); - if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos, - ptNodeWritingPos)) { - return false; - } - } - // Copy bigram list when the originalBigramListPos is valid dictionary position. - int bigramCount = 0; - if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) { - int fromPos = ptNodeParams->getBigramsPos(); - if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) { - return false; - } - } - // Create node flags and write them. - PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), - ptNodeParams->isNotAWord(), - ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */, - ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, - bigramCount > 0 /* hasBigrams */, - ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, - CHILDREN_POSITION_FIELD_SIZE); - int flagsFieldPos = nodePos; - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, - &flagsFieldPos)) { - return false; - } - return true; + const int probabilityToWrite = getUpdatedProbability( + NOT_A_PROBABILITY /* originalProbability */, ptNodeParams->getProbability()); + return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + probabilityToWrite, &probabilityFieldPos); } bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry( @@ -289,4 +249,90 @@ bool DynamicPatriciaTrieNodeWriter::updateAllPositionFields( return true; } +bool DynamicPatriciaTrieNodeWriter::writePtNodeAndGetProbabilityFieldPosAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos, + int *const ptNodeWritingPos) { + const int nodePos = *ptNodeWritingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, + 0 /* nodeFlags */, ptNodeWritingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer, + ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, + ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return false; + } + // Write probability when the probability is a valid probability, which means this node is + // terminal. + if (ptNodeParams->isTerminal()) { + if (outProbabilityFieldPos) { + *outProbabilityFieldPos = *ptNodeWritingPos; + } + if (ptNodeParams->getProbability() == NOT_A_PROBABILITY) { + // Write a dummy probability. + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + 0 /* probability */, ptNodeWritingPos)) { + return false; + } + } else { + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + ptNodeParams->getProbability(), ptNodeWritingPos)) { + return false; + } + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + return false; + } + // Copy shortcut list when the originalShortcutListPos is valid dictionary position. + if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) { + int fromPos = ptNodeParams->getShortcutPos(); + if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos, + ptNodeWritingPos)) { + return false; + } + } + // Copy bigram list when the originalBigramListPos is valid dictionary position. + int bigramCount = 0; + if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) { + int fromPos = ptNodeParams->getBigramsPos(); + if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) { + return false; + } + } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), + ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), + ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, + bigramCount > 0 /* hasBigrams */, + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } + return true; +} + +int DynamicPatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability, + const int newProbability) const { + if (mNeedsToDecayWhenUpdating) { + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); + } else { + return newProbability; + } +} + } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h index e1d08fb74..20b0ca428 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h @@ -39,9 +39,10 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter { DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer, const DynamicPatriciaTrieNodeReader *const ptNodeReader, DynamicBigramListPolicy *const bigramPolicy, - DynamicShortcutListPolicy *const shortcutPolicy) + DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecayWhenUpdating) : mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader), - mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {} virtual ~DynamicPatriciaTrieNodeWriter() {} @@ -59,6 +60,9 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter { virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos); + virtual bool writeNewTerminalPtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos); + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, bool *const outAddedNewBigram); @@ -76,6 +80,12 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter { private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter); + bool writePtNodeAndGetProbabilityFieldPosAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos, + int *const ptNodeWritingPos); + + int getUpdatedProbability(const int originalProbability, const int newProbability) const; + static const int CHILDREN_POSITION_FIELD_SIZE; BufferWithExtendableBuffer *const mBuffer; @@ -83,7 +93,7 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter { DynamicPatriciaTrieReadingHelper mReadingHelper; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; - + const bool mNeedsToDecayWhenUpdating; }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index d4fb937d6..e80c7588d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -231,8 +231,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary."); return; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, - &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } @@ -246,8 +246,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, needsToDecay); - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, - &mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); mNeedsToDecayForTesting = false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index 636c9bf5d..fec9efce0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -49,9 +49,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mHeaderPolicy.isDecayingDict()), mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy), mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy, - &mShortcutListPolicy), - mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter, - mHeaderPolicy.isDecayingDict()), + &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), + mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp index 39e1ecaaa..e70c0eca4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp @@ -22,7 +22,6 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -53,9 +52,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord( if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; - return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, - probability), + return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability, wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } @@ -66,8 +63,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord( } if (!ptNodeParams.hasChildren()) { *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), + return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability, wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); } @@ -83,8 +79,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord( *outAddedNewUnigram = true; return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), - codePointCount - readingHelper->getPrevTotalCodePointCount(), - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); + codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, &pos); } bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, @@ -124,19 +119,18 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability( if (originalPtNodeParams->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; - const int probabilityToWrite = getUpdatedProbability( - originalPtNodeParams->getProbability(), probability); - return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite); + return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability); } else { // Make the node terminal and write the probability. *outAddedNewUnigram = true; const int movedPos = mBuffer->getTailPosition(); int writingPos = movedPos; const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, - originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), - originalPtNodeParams->getCodePoints(), - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability))); - if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { + true /* isTerminal */, originalPtNodeParams->getParentPos(), + originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), + probability)); + if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, + &writingPos)) { return false; } if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { @@ -165,9 +159,10 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( 1 /* arraySize */, &writingPos)) { return false; } - const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */, parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); - if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { + if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, + &writingPos)) { return false; } if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, @@ -194,12 +189,21 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( int writingPos = firstPartOfReallocatedPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. - const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; - const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( - reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, - reallocatingPtNodeParams->getCodePoints(), newProbability)); - if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { - return false; + if (addsExtraChild) { + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */, + reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, + reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { + return false; + } + } else { + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */, + reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, + reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); + if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, + &writingPos)) { + return false; + } } const int actualChildrenPos = writingPos; // Create new children PtNode array. @@ -211,7 +215,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( // Write the 2nd part of the reallocating node. const int secondPartOfReallocatedPtNodePos = writingPos; const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, - firstPartOfReallocatedPtNodePos, + reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos, reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, reallocatingPtNodeParams->getProbability())); @@ -219,10 +223,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( return false; } if (addsExtraChild) { - const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( + const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); - if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) { + if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, + &writingPos)) { return false; } } @@ -242,22 +247,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos); } -int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability, - const int newProbability) const { - if (mNeedsToDecay) { - return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, - newProbability); - } else { - return newProbability; - } -} - const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams( - const PtNodeParams *const originalPtNodeParams, const int parentPos, + const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) const { const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( - originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), - probability != NOT_A_PROBABILITY /* isTerminal */, + originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal, originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, @@ -265,11 +259,10 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams( } const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode( - const int parentPos, const int codePointCount, const int *const codePoints, - const int probability) const { + const bool isTerminal, const int parentPos, const int codePointCount, + const int *const codePoints, const int probability) const { const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( - false /* isBlacklisted */, false /* isNotAWord */, - probability != NOT_A_PROBABILITY /* isTerminal */, + false /* isBlacklisted */, false /* isNotAWord */, isTerminal, false /* hasShortcutTargets */, false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h index b9800cd80..cc51d4bd1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h @@ -34,10 +34,8 @@ class PtNodeWriter; class DynamicPatriciaTrieUpdatingHelper { public: DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer, - const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter, - const bool needsToDecay) - : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter), - mNeedsToDecay(needsToDecay) {} + const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter) + : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {} ~DynamicPatriciaTrieUpdatingHelper() {} @@ -61,7 +59,6 @@ class DynamicPatriciaTrieUpdatingHelper { BufferWithExtendableBuffer *const mBuffer; const PtNodeReader *const mPtNodeReader; PtNodeWriter *const mPtNodeWriter; - const bool mNeedsToDecay; bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); @@ -80,14 +77,12 @@ class DynamicPatriciaTrieUpdatingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - int getUpdatedProbability(const int originalProbability, const int newProbability) const; - const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, - const int parentPos, const int codePointCount, const int *const codePoints, - const int probability) const; - - const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount, + const bool isTerminal, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) const; + + const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) const; }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index fe5e2c626..321189d41 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -76,11 +76,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int *const outUnigramCount, int *const outBigramCount) { DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader); + DynamicPatriciaTrieNodeWriter ptNodeWriter(mBuffer, &ptNodeReader, mBigramPolicy, + mShortcutPolicy, false /* needsToDecayWhenUpdating */); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay); + headerPolicy, &ptNodeWriter, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; @@ -92,7 +94,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability - traversePolicyToUpdateBigramProbability(mPtNodeWriter); + traversePolicyToUpdateBigramProbability(&ptNodeWriter); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateBigramProbability)) { return false; @@ -106,7 +108,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy, - mShortcutPolicy); + mShortcutPolicy, false /* needsToDecayWhenUpdating */); DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite, &dictPositionRelocationMap); @@ -124,7 +126,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, &newDictShortcutPolicy); DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader); DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader, - &newDictBigramPolicy, &newDictShortcutPolicy); + &newDictBigramPolicy, &newDictShortcutPolicy, false /* needsToDecayWhenUpdating */); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index e343bf9e5..6d722e5f3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h @@ -29,8 +29,6 @@ class DynamicBigramListPolicy; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; class HeaderPolicy; -class PtNodeReader; -class PtNodeWriter; // TODO: Make it independent from a particular format and move to pt_common. class DynamicPatriciaTrieWritingHelper { @@ -38,11 +36,9 @@ class DynamicPatriciaTrieWritingHelper { static const size_t MAX_DICTIONARY_SIZE; DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, - const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter, DynamicBigramListPolicy *const bigramPolicy, DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) - : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter), - mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), mNeedsToDecay(needsToDecay) {} ~DynamicPatriciaTrieWritingHelper() {} @@ -57,8 +53,6 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); BufferWithExtendableBuffer *const mBuffer; - const PtNodeReader *const mPtNodeReader; - PtNodeWriter *const mPtNodeWriter; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; const bool mNeedsToDecay; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index e8a3142b8..277545798 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -115,8 +116,10 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( if (!toBeUpdatedPtNodeParams->isTerminal()) { return false; } + const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(), + newProbability); return mBuffers->getUpdatableProbabilityDictContent()->setProbability( - toBeUpdatedPtNodeParams->getTerminalId(), newProbability); + toBeUpdatedPtNodeParams->getTerminalId(), probabilityToWrite); } bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition( @@ -134,67 +137,23 @@ bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBe bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { - const int nodePos = *ptNodeWritingPos; - // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the - // PtNode writing. - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, - 0 /* nodeFlags */, ptNodeWritingPos)) { - return false; - } - // Calculate a parent offset and write the offset. - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, - ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { - return false; - } - // Write code points - if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer, - ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { - return false; - } + return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */, + ptNodeWritingPos); +} + + +bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) { - terminalId = ptNodeParams->getTerminalId(); - } else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) { - // Write terminal information using a new terminal id. - // Get a new unused terminal id. - terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId(); - } - const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; - if (isTerminal) { - // Update the lookup table. - if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition( - terminalId, nodePos)) { - return false; - } - // Write terminal Id. - if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId, - Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) { - return false; - } - // Write probability. - if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability( - terminalId, ptNodeParams->getProbability())) { - return false; - } - } - // Write children position - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, - ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, + ptNodeWritingPos)) { return false; } - // Create node flags and write them. - PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), - ptNodeParams->isNotAWord(), isTerminal, - ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), - ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, - CHILDREN_POSITION_FIELD_SIZE); - int flagsFieldPos = nodePos; - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags, - &flagsFieldPos)) { - return false; - } - return true; + // Write probability. + const int probabilityToWrite = getUpdatedProbability(NOT_A_PROBABILITY, + ptNodeParams->getProbability()); + return mBuffers->getUpdatableProbabilityDictContent()->setProbability(terminalId, + probabilityToWrite); } bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( @@ -258,4 +217,85 @@ bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields( return true; } +bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const outTerminalId, + int *const ptNodeWritingPos) { + const int nodePos = *ptNodeWritingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, + 0 /* nodeFlags */, ptNodeWritingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, + ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer, + ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return false; + } + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) { + terminalId = ptNodeParams->getTerminalId(); + } else if (ptNodeParams->isTerminal()) { + // Write terminal information using a new terminal id. + // Get a new unused terminal id. + terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId(); + } + const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; + if (isTerminal) { + // Update the lookup table. + if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition( + terminalId, nodePos)) { + return false; + } + // Write terminal Id. + if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId, + Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) { + return false; + } + // Write probability. + if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) { + if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability( + terminalId, ptNodeParams->getProbability())) { + return false; + } + } + if (outTerminalId) { + *outTerminalId = terminalId; + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, + ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + return false; + } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), + ptNodeParams->isNotAWord(), isTerminal, + ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } + return true; +} + +int Ver4PatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability, + const int newProbability) const { + if (mNeedsToDecayWhenUpdating) { + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); + } else { + return newProbability; + } +} + } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index 12451525f..2d836358d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -39,10 +39,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { public: Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, - Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) + Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy, + const bool needsToDecayWhenUpdating) : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), mReadingHelper(mTrieBuffer, mPtNodeReader), - mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {} virtual ~Ver4PatriciaTrieNodeWriter() {} @@ -63,6 +65,9 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos); + virtual bool writeNewTerminalPtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos); + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, bool *const outAddedNewBigram); @@ -80,6 +85,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { private: DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); + bool writePtNodeAndGetTerminalIdAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const outTerminalId, + int *const ptNodeWritingPos); + + int getUpdatedProbability(const int originalProbability, const int newProbability) const; + static const int CHILDREN_POSITION_FIELD_SIZE; BufferWithExtendableBuffer *const mTrieBuffer; @@ -88,6 +99,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { DynamicPatriciaTrieReadingHelper mReadingHelper; Ver4BigramListPolicy *const mBigramPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy; + const bool mNeedsToDecayWhenUpdating; }; } // namespace latinime #endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 3606a2ae9..f2ce52941 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -49,9 +49,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, - &mShortcutPolicy), - mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter, - mHeaderPolicy.isDecayingDict()), + &mShortcutPolicy, mHeaderPolicy.isDecayingDict()), + mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index ea03c72fa..a7b729b94 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -87,7 +87,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), - mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy, + false /* needsToDecayWhenUpdating */); DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); @@ -121,7 +122,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy, + false /* needsToDecayWhenUpdating */); DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); @@ -139,7 +141,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(), buffersToWrite->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy); + buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy, + false /* needsToDecayWhenUpdating */); DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(), &newPtNodeReader); diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java index c0dd9933c..6ad125053 100644 --- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java +++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java @@ -16,6 +16,7 @@ package com.android.inputmethod.latin; +import com.android.inputmethod.latin.settings.SettingsValues; import com.android.inputmethod.latin.utils.TextRange; import android.inputmethodservice.InputMethodService; @@ -39,7 +40,8 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { // The following is meant to be a reasonable default for // the "word_separators" resource. - private static final String sSeparators = ".,:;!?-"; + private static final SettingsValues sSettings = + SettingsValues.makeDummySettingsValuesForTest(Locale.ENGLISH); @Override protected void setUp() throws Exception { @@ -137,9 +139,9 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { */ public void testGetPreviousWord() { // If one of the following cases breaks, the bigram suggestions won't work. - assertEquals(RichInputConnection.getNthPreviousWord("abc def", sSeparators, 2), "abc"); - assertNull(RichInputConnection.getNthPreviousWord("abc", sSeparators, 2)); - assertNull(RichInputConnection.getNthPreviousWord("abc. def", sSeparators, 2)); + assertEquals(RichInputConnection.getNthPreviousWord("abc def", sSettings, 2), "abc"); + assertNull(RichInputConnection.getNthPreviousWord("abc", sSettings, 2)); + assertNull(RichInputConnection.getNthPreviousWord("abc. def", sSettings, 2)); // The following tests reflect the current behavior of the function // RichInputConnection#getNthPreviousWord. @@ -148,15 +150,15 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { // this function if needed - especially since it does not seem very // logical. These tests are just there to catch any unintentional // changes in the behavior of the RichInputConnection#getPreviousWord method. - assertEquals(RichInputConnection.getNthPreviousWord("abc def ", sSeparators, 2), "abc"); - assertEquals(RichInputConnection.getNthPreviousWord("abc def.", sSeparators, 2), "abc"); - assertEquals(RichInputConnection.getNthPreviousWord("abc def .", sSeparators, 2), "def"); - assertNull(RichInputConnection.getNthPreviousWord("abc ", sSeparators, 2)); - - assertEquals(RichInputConnection.getNthPreviousWord("abc def", sSeparators, 1), "def"); - assertEquals(RichInputConnection.getNthPreviousWord("abc def ", sSeparators, 1), "def"); - assertNull(RichInputConnection.getNthPreviousWord("abc def.", sSeparators, 1)); - assertNull(RichInputConnection.getNthPreviousWord("abc def .", sSeparators, 1)); + assertEquals(RichInputConnection.getNthPreviousWord("abc def ", sSettings, 2), "abc"); + assertEquals(RichInputConnection.getNthPreviousWord("abc def.", sSettings, 2), "abc"); + assertEquals(RichInputConnection.getNthPreviousWord("abc def .", sSettings, 2), "def"); + assertNull(RichInputConnection.getNthPreviousWord("abc ", sSettings, 2)); + + assertEquals(RichInputConnection.getNthPreviousWord("abc def", sSettings, 1), "def"); + assertEquals(RichInputConnection.getNthPreviousWord("abc def ", sSettings, 1), "def"); + assertNull(RichInputConnection.getNthPreviousWord("abc def.", sSettings, 1)); + assertNull(RichInputConnection.getNthPreviousWord("abc def .", sSettings, 1)); } /** diff --git a/tests/src/com/android/inputmethod/latin/WordComposerTests.java b/tests/src/com/android/inputmethod/latin/WordComposerTests.java index 1434c6b63..6d103efea 100644 --- a/tests/src/com/android/inputmethod/latin/WordComposerTests.java +++ b/tests/src/com/android/inputmethod/latin/WordComposerTests.java @@ -26,8 +26,15 @@ import android.test.suitebuilder.annotation.SmallTest; public class WordComposerTests extends AndroidTestCase { public void testMoveCursor() { final WordComposer wc = new WordComposer(); + // BMP is the Basic Multilingual Plane, as defined by Unicode. This includes + // most characters for most scripts, including all Roman alphabet languages, + // CJK, Arabic, Hebrew. Notable exceptions include some emoji and some + // very rare Chinese ideograms. BMP characters can be encoded on 2 bytes + // in UTF-16, whereas those outside the BMP need 4 bytes. + // http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane final String STR_WITHIN_BMP = "abcdef"; - wc.setComposingWord(STR_WITHIN_BMP, null); + final String PREVWORD = "prevword"; + wc.setComposingWord(STR_WITHIN_BMP, PREVWORD, null); assertEquals(wc.size(), STR_WITHIN_BMP.codePointCount(0, STR_WITHIN_BMP.length())); assertFalse(wc.isCursorFrontOrMiddleOfComposingWord()); @@ -43,13 +50,19 @@ public class WordComposerTests extends AndroidTestCase { // Move the cursor to after the 'f' assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(1)); assertFalse(wc.isCursorFrontOrMiddleOfComposingWord()); + // Check the previous word is still there + assertEquals(PREVWORD, wc.getPreviousWord()); // Move the cursor past the end of the word assertFalse(wc.moveCursorByAndReturnIfInsideComposingWord(1)); assertFalse(wc.moveCursorByAndReturnIfInsideComposingWord(15)); + // Do what LatinIME does when the cursor is moved outside of the word, + // and check the behavior is correct. + wc.reset(); + assertNull(wc.getPreviousWord()); // \uD861\uDED7 is 𨛗, a character outside the BMP final String STR_WITH_SUPPLEMENTARY_CHAR = "abcde\uD861\uDED7fgh"; - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null, null); assertEquals(wc.size(), STR_WITH_SUPPLEMENTARY_CHAR.codePointCount(0, STR_WITH_SUPPLEMENTARY_CHAR.length())); assertFalse(wc.isCursorFrontOrMiddleOfComposingWord()); @@ -59,34 +72,40 @@ public class WordComposerTests extends AndroidTestCase { assertTrue(wc.isCursorFrontOrMiddleOfComposingWord()); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(1)); assertFalse(wc.isCursorFrontOrMiddleOfComposingWord()); + assertNull(wc.getPreviousWord()); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, STR_WITHIN_BMP, null); wc.setCursorPositionWithinWord(3); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(7)); + assertEquals(STR_WITHIN_BMP, wc.getPreviousWord()); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, STR_WITH_SUPPLEMENTARY_CHAR, null); wc.setCursorPositionWithinWord(3); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(7)); + assertEquals(STR_WITH_SUPPLEMENTARY_CHAR, wc.getPreviousWord()); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, STR_WITHIN_BMP, null); wc.setCursorPositionWithinWord(3); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(-3)); assertFalse(wc.moveCursorByAndReturnIfInsideComposingWord(-1)); + assertEquals(STR_WITHIN_BMP, wc.getPreviousWord()); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null, null); wc.setCursorPositionWithinWord(3); assertFalse(wc.moveCursorByAndReturnIfInsideComposingWord(-9)); + assertNull(wc.getPreviousWord()); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, STR_WITH_SUPPLEMENTARY_CHAR, null); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(-10)); + assertEquals(STR_WITH_SUPPLEMENTARY_CHAR, wc.getPreviousWord()); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null, null); assertFalse(wc.moveCursorByAndReturnIfInsideComposingWord(-11)); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null, null); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(0)); - wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null); + wc.setComposingWord(STR_WITH_SUPPLEMENTARY_CHAR, null, null); wc.setCursorPositionWithinWord(2); assertTrue(wc.moveCursorByAndReturnIfInsideComposingWord(0)); } |