diff options
20 files changed, 309 insertions, 68 deletions
diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java index 55df263fe..845a9b987 100644 --- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java @@ -58,7 +58,7 @@ abstract public class AbstractDictionaryWriter extends Dictionary { final File file = new File(mContext.getFilesDir(), fileName); final File tempFile = new File(mContext.getFilesDir(), tempFileName); try { - final DictEncoder dictEncoder = new Ver3DictEncoder(file); + final DictEncoder dictEncoder = new Ver3DictEncoder(tempFile); writeDictionary(dictEncoder); tempFile.renameTo(file); } catch (IOException e) { diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index b49cd80ab..632ee0da4 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -109,7 +109,7 @@ public final class BinaryDictionary extends Dictionary { private static native void flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); private static native int getProbabilityNative(long dict, int[] word); - private static native boolean isValidBigramNative(long dict, int[] word0, int[] word1); + private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); private static native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, @@ -122,6 +122,8 @@ public final class BinaryDictionary extends Dictionary { private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, int probability); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); + private static native int calculateProbabilityNative(long dict, int unigramProbability, + int bigramProbability); // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, @@ -219,12 +221,12 @@ public final class BinaryDictionary extends Dictionary { @Override public boolean isValidWord(final String word) { - return getFrequency(word) >= 0; + return getFrequency(word) != NOT_A_PROBABILITY; } @Override public int getFrequency(final String word) { - if (word == null) return -1; + if (word == null) return NOT_A_PROBABILITY; int[] codePoints = StringUtils.toCodePointArray(word); return getProbabilityNative(mNativeDict, codePoints); } @@ -232,10 +234,14 @@ public final class BinaryDictionary extends Dictionary { // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni // calls when checking for changes in an entire dictionary. public boolean isValidBigram(final String word0, final String word1) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return false; + return getBigramProbability(word0, word1) != NOT_A_PROBABILITY; + } + + public int getBigramProbability(final String word0, final String word1) { + if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY; final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); - return isValidBigramNative(mNativeDict, codePoints0, codePoints1); + return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); } // Add a unigram entry to binary dictionary in native code. @@ -285,6 +291,12 @@ public final class BinaryDictionary extends Dictionary { return needsToRunGCNative(mNativeDict); } + @UsedForTesting + public int calculateProbability(final int unigramProbability, final int bigramProbability) { + if (!isValidDictionary()) return NOT_A_PROBABILITY; + return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); + } + @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // TODO: actually use the confidence rather than use this completely broken heuristic diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 7815f4d41..1684d47b5 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -460,7 +460,7 @@ public final class Suggest { private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = new SuggestedWordInfoComparator(); - private static SuggestedWordInfo getTransformedSuggestedWordInfo( + /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); @@ -471,7 +471,12 @@ public final class Suggest { } else { sb.append(wordInfo.mWord); } - for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { + // Appending quotes is here to help people quote words. However, it's not helpful + // when they type words with quotes toward the end like "it's" or "didn't", where + // it's more likely the user missed the last character (or didn't type it yet). + final int quotesToAppend = trailingSingleQuotesCount + - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); + for (int i = quotesToAppend - 1; i >= 0; --i) { sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); } return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index a63fab6dc..7f47493b2 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -188,8 +188,8 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, return dictionary->getProbability(codePoints, wordLength); } -static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1) { +static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, + jlong dict, jintArray word0, jintArray word1) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return JNI_FALSE; const jsize word0Length = env->GetArrayLength(word0); @@ -198,7 +198,8 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass claz int word1CodePoints[word1Length]; env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length); + return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints, + word1Length); } static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, @@ -269,6 +270,16 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz word1Length); } +static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, + jlong dict, jint unigramProbability, jint bigramProbability) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return NOT_A_PROBABILITY; + } + return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability, + bigramProbability); +} + static const JNINativeMethod sMethods[] = { { const_cast<char *>("openNative"), @@ -306,9 +317,9 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability) }, { - const_cast<char *>("isValidBigramNative"), - const_cast<char *>("(J[I[I)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram) + const_cast<char *>("getBigramProbabilityNative"), + const_cast<char *>("(J[I[I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) }, { const_cast<char *>("calcNormalizedScoreNative"), @@ -334,6 +345,11 @@ static const JNINativeMethod sMethods[] = { const_cast<char *>("removeBigramWordsNative"), const_cast<char *>("(J[I[I)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) + }, + { + const_cast<char *>("calculateProbabilityNative"), + const_cast<char *>("(JII)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative) } }; diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 377015371..41ef9d2b2 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -143,7 +143,7 @@ class DicNode { dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), dicNode->mDicNodeProperties.getDepth(), - dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevSpacePositions, + dicNode->mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(), mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } @@ -321,8 +321,13 @@ class DicNode { DUMP_WORD_AND_SCORE("OUTPUT"); } - void outputSpacePositionsResult(int *spaceIndices) const { - mDicNodeState.mDicNodeStatePrevWord.outputSpacePositions(spaceIndices); + int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { + const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); + if (inputIndex == NOT_AN_INDEX) { + return NOT_AN_INDEX; + } else { + return pInfoState->getInputIndexOfSampledPoint(inputIndex); + } } bool hasMultipleWords() const { @@ -573,7 +578,11 @@ class DicNode { } } - AK_FORCE_INLINE void updateInputIndexG(DicNode_InputStateG *inputStateG) { + AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) { + if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) { + mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex( + inputStateG->mInputIndex); + } mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId, inputStateG->mInputIndex, inputStateG->mPrevCodePoint, inputStateG->mTerminalDiffCost, inputStateG->mRawLength); diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index b7af97018..b8986203d 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/layout/proximity_info_state.h" namespace latinime { @@ -29,9 +30,8 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_DICT_POS) { + mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); - memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); } virtual ~DicNodeStatePrevWord() {} @@ -42,7 +42,7 @@ class DicNodeStatePrevWord { mPrevWordStart = 0; mPrevWordProbability = -1; mPrevWordNodePos = NOT_A_DICT_POS; - memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); + mSecondWordFirstInputIndex = NOT_AN_INDEX; } void init(const int prevWordNodePos) { @@ -51,7 +51,7 @@ class DicNodeStatePrevWord { mPrevWordStart = 0; mPrevWordProbability = -1; mPrevWordNodePos = prevWordNodePos; - memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); + mSecondWordFirstInputIndex = NOT_AN_INDEX; } // Init by copy @@ -61,14 +61,14 @@ class DicNodeStatePrevWord { mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; mPrevWordNodePos = prevWord->mPrevWordNodePos; + mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); - memcpy(mPrevSpacePositions, prevWord->mPrevSpacePositions, sizeof(mPrevSpacePositions)); } void init(const int16_t prevWordCount, const int16_t prevWordProbability, const int prevWordNodePos, const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, const int *const prevSpacePositions, - const int lastInputIndex) { + const int *const src1, const int16_t length1, + const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; mPrevWordNodePos = prevWordNodePos; @@ -80,8 +80,7 @@ class DicNodeStatePrevWord { mPrevWord[twoWordsLen] = KEYCODE_SPACE; mPrevWordStart = length0; mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1); - memcpy(mPrevSpacePositions, prevSpacePositions, sizeof(mPrevSpacePositions)); - mPrevSpacePositions[mPrevWordCount - 1] = lastInputIndex; + mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex; } void truncate(const int offset) { @@ -96,11 +95,12 @@ class DicNodeStatePrevWord { mPrevWordLength = newPrevWordLength; } - void outputSpacePositions(int *spaceIndices) const { - // Convert uint16_t to int - for (int i = 0; i < MAX_RESULTS; i++) { - spaceIndices[i] = mPrevSpacePositions[i]; - } + void setSecondWordFirstInputIndex(const int inputIndex) { + mSecondWordFirstInputIndex = inputIndex; + } + + int getSecondWordFirstInputIndex() const { + return mSecondWordFirstInputIndex; } // TODO: remove @@ -138,8 +138,6 @@ class DicNodeStatePrevWord { // TODO: Move to private int mPrevWord[MAX_WORD_LENGTH]; - // TODO: Move to private - int mPrevSpacePositions[MAX_RESULTS]; private: // Caution!!! @@ -150,6 +148,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordStart; int16_t mPrevWordProbability; int mPrevWordNodePos; + int mSecondWordFirstInputIndex; }; } // namespace latinime #endif // LATINIME_DIC_NODE_STATE_PREVWORD_H diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 425b07624..5ba71c168 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -150,24 +150,26 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } -bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, +int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1, int length1) const { int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) return false; + if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (NOT_A_DICT_POS == nextWordPos) return false; + if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { - return true; + return mDictionaryStructurePolicy->getProbability( + mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), + bigramsIt.getProbability()); } } - return false; + return NOT_A_PROBABILITY; } // TODO: Move functions related to bigram to here diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 99b964c49..8af7ee75d 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -29,7 +29,7 @@ class BigramDictionary { int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; - bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; + int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); private: diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 033572201..ec1b63a12 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -93,8 +93,9 @@ int Dictionary::getProbability(const int *word, int length) const { return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } -bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { - return mBigramDictionary->isValidBigram(word0, length0, word1, length1); +int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, + int length1) const { + return mBigramDictionary->getBigramProbability(word0, length0, word1, length1); } void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 06e84bbfe..974447468 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -67,7 +67,7 @@ class Dictionary { int getProbability(const int *word, int length) const; - bool isValidBigram(const int *word0, int length0, const int *word1, int length1) const; + int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; void addUnigramWord(const int *const word, const int length, const int probability); diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index 01bf81864..c94060fa9 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -130,6 +130,10 @@ class ProximityInfoState { return mSampledInputYs[index]; } + int getInputIndexOfSampledPoint(const int sampledIndex) const { + return mSampledInputIndice[sampledIndex]; + } + bool hasSpaceProximity(const int index) const; int getLengthCache(const int index) const { diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index e2ef5fc76..e0b1c67d9 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -113,7 +113,9 @@ class DicTraverseSession { if (usedPointerCount != 1) { return false; } - *pointerId = usedPointerId; + if (pointerId) { + *pointerId = usedPointerId; + } return true; } diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index e788e914a..0c925be25 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -117,7 +117,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo * Outputs the final list of suggestions (i.e., terminal nodes). */ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, - int *outputCodePoints, int *spaceIndices, int *outputTypes) const { + int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const { #if DEBUG_EVALUATE_MOST_PROBABLE_STRING const int terminalSize = 0; #else @@ -139,6 +139,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, &outputCodePoints[0], &outputTypes[0], &frequencies[0]); if (hasMostProbableString) { + outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; ++outputWordIndex; } @@ -160,6 +161,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT && terminals[0].hasMultipleWords())) : false; + // TODO: have partial commit work even with multiple pointers. + const bool outputSecondWordFirstLetterInputIndex = + traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; ++terminalIndex) { @@ -194,18 +198,21 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen terminalDicNode->isExactMatch() || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) || (isValidWord && SCORING->doesAutoCorrectValidWord())); - maxScore = max(maxScore, finalScore); - - // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. - // Index for top typing suggestion should be 0. - if (isValidWord && outputWordIndex == 0) { - terminalDicNode->outputSpacePositionsResult(spaceIndices); + if (maxScore < finalScore && isValidWord) { + maxScore = finalScore; } // Don't output invalid words. However, we still need to submit their shortcuts if any. if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; + if (outputSecondWordFirstLetterInputIndex) { + outputIndicesToPartialCommit[outputWordIndex] = + terminalDicNode->getSecondWordFirstInputIndex( + traverseSession->getProximityInfoState(0)); + } else { + outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; + } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); @@ -220,8 +227,19 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); - outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex, - finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); + const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, + outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, + sameAsTyped); + const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( + traverseSession->getProximityInfoState(0)); + for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) { + if (outputSecondWordFirstLetterInputIndex) { + outputIndicesToPartialCommit[i] = secondWordFirstInputIndex; + } else { + outputIndicesToPartialCommit[i] = NOT_AN_INDEX; + } + } + outputWordIndex = updatedOutputWordIndex; } DicNode::managedDelete(terminalDicNode); } diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 875cbe4e0..b24019632 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -55,7 +55,7 @@ class Suggest : public SuggestInterface { void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, const bool spaceSubstitution) const; int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, - int *outputCodePoints, int *outputIndices, int *outputTypes) const; + int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const; void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 3cfbfd85b..2198a13c9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -248,7 +248,9 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary."); return; } - // TODO: Implement. + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy); + writingHelper.writeToDictFile(filePath, mBuffer->getBuffer(), mHeaderPolicy.getSize()); } void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 311d31e5d..a67c0d94a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -16,6 +16,9 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include <cstdio> +#include <cstring> + #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" @@ -27,6 +30,8 @@ namespace latinime { const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; +const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = + ".tmp"; bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, @@ -131,6 +136,41 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); } +void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, + const uint8_t *const headerBuf, const int headerSize) { + const int tmpFileNameBufSize = strlen(fileName) + + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1; + char tmpFileName[tmpFileNameBufSize]; + snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + FILE *const file = fopen(tmpFileName, "wb"); + if (!file) { + return; + } + // Write header. + if (fwrite(headerBuf, headerSize, 1, file) < 1) { + fclose(file); + remove(tmpFileName); + return; + } + // Write data in original buffer. + if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */), + mBuffer->getOriginalBufferSize(), 1, file) < 1) { + fclose(file); + remove(tmpFileName); + return; + } + // Write data in additional buffer. + if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */), + mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) { + fclose(file); + remove(tmpFileName); + return; + } + fclose(file); + rename(tmpFileName, fileName); +} + bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, const int bigramLinkedNodePos) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 20e35abcf..faf7a4e1b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -17,6 +17,8 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H +#include <stdint.h> + #include "defines.h" namespace latinime { @@ -46,10 +48,14 @@ class DynamicPatriciaTrieWritingHelper { // Remove a bigram relation from word0Pos to word1Pos. bool removeBigramWords(const int word0Pos, const int word1Pos); + void writeToDictFile(const char *const fileName, const uint8_t *const headerBuf, + const int headerSize); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 4d231cde7..d8105ba38 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -151,7 +151,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); - probabilityMap.put(word, random.nextInt() & 0xFF); + probabilityMap.put(word, random.nextInt(0xFF)); } for (String word : probabilityMap.keySet()) { binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); @@ -163,8 +163,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { - // TODO: Add a test to check the frequency of the bigram score which uses current value - // calculated in the native code File dictFile = null; try { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); @@ -179,6 +177,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int bigramProbability = 10; + final int updatedBigramProbability = 15; binaryDictionary.addUnigramWord("aaa", unigramProbability); binaryDictionary.addUnigramWord("abb", unigramProbability); binaryDictionary.addUnigramWord("bcc", unigramProbability); @@ -187,21 +186,49 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + + binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); + final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, + updatedBigramProbability); + assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("bcc", "aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("bcc", "bbc")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("aaa", "aaa")); + + // Testing bigram link. + binaryDictionary.addUnigramWord("abcde", unigramProbability); + binaryDictionary.addUnigramWord("fghij", unigramProbability); + binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); + binaryDictionary.addUnigramWord("fgh", unigramProbability); + binaryDictionary.addUnigramWord("abc", unigramProbability); + binaryDictionary.addUnigramWord("f", unigramProbability); + assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("abcde", "fgh")); + binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); + assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); dictFile.delete(); } public void testRandomlyAddBigramWords() { - // TODO: Add a test to check the frequency of the bigram score which uses current value - // calculated in the native code final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; @@ -222,29 +249,38 @@ public class BinaryDictionaryTests extends AndroidTestCase { // Test a word that isn't contained within the dictionary. final Random random = new Random(seed); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final int unigramProbability = 100; - final int bigramProbability = 10; + final int[] unigramProbabilities = new int[wordCount]; for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities[i] = unigramProbability; binaryDictionary.addUnigramWord(word, unigramProbability); } - final boolean[][] bigramRelations = new boolean[wordCount][wordCount]; + final int[][] probabilities = new int[wordCount][wordCount]; + + for (int i = 0; i < wordCount; ++i) { + for (int j = 0; j < wordCount; ++j) { + probabilities[i][j] = Dictionary.NOT_A_PROBABILITY; + } + } + for (int i = 0; i < bigramCount; i++) { final int word0Index = random.nextInt(wordCount); final int word1Index = random.nextInt(wordCount); final String word0 = words.get(word0Index); final String word1 = words.get(word1Index); - - bigramRelations[word0Index][word1Index] = true; + final int bigramProbability = random.nextInt(0xF); + probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability( + unigramProbabilities[word1Index], bigramProbability); binaryDictionary.addBigramWords(word0, word1, bigramProbability); } for (int i = 0; i < words.size(); i++) { for (int j = 0; j < words.size(); j++) { - assertEquals(bigramRelations[i][j], - binaryDictionary.isValidBigram(words.get(i), words.get(j))); + assertEquals(probabilities[i][j], + binaryDictionary.getBigramProbability(words.get(i), words.get(j))); } } @@ -263,7 +299,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final int unigramProbability = 100; final int bigramProbability = 10; binaryDictionary.addUnigramWord("aaa", unigramProbability); @@ -299,4 +334,54 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + public void testFlushDictionary() { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int probability = 100; + binaryDictionary.addUnigramWord("aaa", probability); + binaryDictionary.addUnigramWord("abcd", probability); + // Close without flushing. + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + assertEquals(-1, binaryDictionary.getFrequency("aaa")); + assertEquals(-1, binaryDictionary.getFrequency("abcd")); + + binaryDictionary.addUnigramWord("aaa", probability); + binaryDictionary.addUnigramWord("abcd", probability); + binaryDictionary.flush(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + assertEquals(probability, binaryDictionary.getFrequency("aaa")); + assertEquals(probability, binaryDictionary.getFrequency("abcd")); + binaryDictionary.addUnigramWord("bcde", probability); + binaryDictionary.flush(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertEquals(probability, binaryDictionary.getFrequency("bcde")); + binaryDictionary.close(); + + dictFile.delete(); + } } diff --git a/tests/src/com/android/inputmethod/latin/InputLogicTests.java b/tests/src/com/android/inputmethod/latin/InputLogicTests.java index fe92be618..cc2569f5e 100644 --- a/tests/src/com/android/inputmethod/latin/InputLogicTests.java +++ b/tests/src/com/android/inputmethod/latin/InputLogicTests.java @@ -134,6 +134,13 @@ public class InputLogicTests extends InputTestsBase { assertEquals("simple auto-correct", EXPECTED_RESULT, mEditText.getText().toString()); } + public void testAutoCorrectWithQuote() { + final String STRING_TO_TYPE = "didn' "; + final String EXPECTED_RESULT = "didn't "; + type(STRING_TO_TYPE); + assertEquals("auto-correct with quote", EXPECTED_RESULT, mEditText.getText().toString()); + } + public void testAutoCorrectWithPeriod() { final String STRING_TO_TYPE = "tgis."; final String EXPECTED_RESULT = "this."; diff --git a/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java b/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java index 4cf83339a..a594baf0b 100644 --- a/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java +++ b/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java @@ -64,4 +64,37 @@ public class SuggestedWordsTests extends AndroidTestCase { assertEquals("0", wordsWithoutTyped.getWord(0)); assertEquals(SuggestedWordInfo.KIND_CORRECTION, wordsWithoutTyped.getInfo(0).mKind); } + + // Helper for testGetTransformedWordInfo + private SuggestedWordInfo createWordInfo(final String s) { + // Use 100 as the frequency because the numerical value does not matter as + // long as it's > 1 and < INT_MAX. + return new SuggestedWordInfo(s, 100, + SuggestedWordInfo.KIND_TYPED, null /* sourceDict */, + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */); + } + + // Helper for testGetTransformedWordInfo + private SuggestedWordInfo transformWordInfo(final String info, + final int trailingSingleQuotesCount) { + return Suggest.getTransformedSuggestedWordInfo(createWordInfo(info), + Locale.ENGLISH, false /* isAllUpperCase */, false /* isFirstCharCapitalized */, + trailingSingleQuotesCount); + } + + public void testGetTransformedSuggestedWordInfo() { + SuggestedWordInfo result = transformWordInfo("word", 0); + assertEquals(result.mWord, "word"); + result = transformWordInfo("word", 1); + assertEquals(result.mWord, "word'"); + result = transformWordInfo("word", 3); + assertEquals(result.mWord, "word'''"); + result = transformWordInfo("didn't", 0); + assertEquals(result.mWord, "didn't"); + result = transformWordInfo("didn't", 1); + assertEquals(result.mWord, "didn't"); + result = transformWordInfo("didn't", 3); + assertEquals(result.mWord, "didn't''"); + } } |