diff options
-rw-r--r-- | java/src/com/android/inputmethod/keyboard/Key.java | 10 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/keyboard/KeyboardView.java | 3 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/keyboard/LatinKeyboard.java | 15 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java | 15 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/LatinIME.java | 6 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/Utils.java | 5 | ||||
-rw-r--r-- | native/Android.mk | 2 | ||||
-rw-r--r-- | native/src/correction.cpp (renamed from native/src/correction_state.cpp) | 123 | ||||
-rw-r--r-- | native/src/correction.h | 154 | ||||
-rw-r--r-- | native/src/correction_state.h | 118 | ||||
-rw-r--r-- | native/src/proximity_info.h | 2 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 88 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 23 |
13 files changed, 334 insertions, 230 deletions
diff --git a/java/src/com/android/inputmethod/keyboard/Key.java b/java/src/com/android/inputmethod/keyboard/Key.java index 4cc0bba6a..397b7b16b 100644 --- a/java/src/com/android/inputmethod/keyboard/Key.java +++ b/java/src/com/android/inputmethod/keyboard/Key.java @@ -113,6 +113,8 @@ public class Key { private boolean mHighlightOn; /** Key is enabled and responds on press */ private boolean mEnabled = true; + /** Whether this key needs to show the "..." popup hint for special purposes */ + private boolean mNeedsSpecialPopupHint; // keyWidth constants private static final int KEYWIDTH_FILL_RIGHT = 0; @@ -402,6 +404,14 @@ public class Key { return (mLabelOption & LABEL_OPTION_HAS_POPUP_HINT) != 0; } + public void setNeedsSpecialPopupHint(boolean needsSpecialPopupHint) { + mNeedsSpecialPopupHint = needsSpecialPopupHint; + } + + public boolean needsSpecialPopupHint() { + return mNeedsSpecialPopupHint; + } + public boolean hasUppercaseLetter() { return (mLabelOption & LABEL_OPTION_HAS_UPPERCASE_LETTER) != 0; } diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardView.java b/java/src/com/android/inputmethod/keyboard/KeyboardView.java index da3aa50c5..2df2994f6 100644 --- a/java/src/com/android/inputmethod/keyboard/KeyboardView.java +++ b/java/src/com/android/inputmethod/keyboard/KeyboardView.java @@ -657,7 +657,8 @@ public class KeyboardView extends View implements PointerTracker.DrawingProxy { } // Draw popup hint "..." at the bottom right corner of the key. - if (key.hasPopupHint() && key.mPopupCharacters != null && key.mPopupCharacters.length > 0) { + if ((key.hasPopupHint() && key.mPopupCharacters != null && key.mPopupCharacters.length > 0) + || key.needsSpecialPopupHint()) { paint.setTextSize(params.mKeyHintLetterSize); paint.setColor(params.mKeyHintLabelColor); paint.setTextAlign(Align.CENTER); diff --git a/java/src/com/android/inputmethod/keyboard/LatinKeyboard.java b/java/src/com/android/inputmethod/keyboard/LatinKeyboard.java index 3cba5295e..1b6f57b92 100644 --- a/java/src/com/android/inputmethod/keyboard/LatinKeyboard.java +++ b/java/src/com/android/inputmethod/keyboard/LatinKeyboard.java @@ -31,10 +31,12 @@ import android.graphics.drawable.BitmapDrawable; import android.graphics.drawable.Drawable; import android.text.TextUtils; +import com.android.inputmethod.compat.InputMethodManagerCompatWrapper; import com.android.inputmethod.keyboard.internal.KeyboardBuilder; import com.android.inputmethod.keyboard.internal.KeyboardParams; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.SubtypeSwitcher; +import com.android.inputmethod.latin.Utils; import java.lang.ref.SoftReference; import java.util.Arrays; @@ -59,6 +61,7 @@ public class LatinKeyboard extends Keyboard { private float mSpacebarTextFadeFactor = 0.0f; private final HashMap<Integer, SoftReference<BitmapDrawable>> mSpaceDrawableCache = new HashMap<Integer, SoftReference<BitmapDrawable>>(); + private final boolean mIsSpacebarTriggeringPopupByLongPress; /* Shortcut key and its icons if available */ private final Key mShortcutKey; @@ -85,6 +88,9 @@ public class LatinKeyboard extends Keyboard { mShortcutKey = params.mShortcutKey; mEnabledShortcutIcon = (mShortcutKey != null) ? mShortcutKey.getIcon() : null; + final int longPressSpaceKeyTimeout = + mRes.getInteger(R.integer.config_long_press_space_key_timeout); + mIsSpacebarTriggeringPopupByLongPress = (longPressSpaceKeyTimeout > 0); final TypedArray a = context.obtainStyledAttributes( null, R.styleable.LatinKeyboard, R.attr.latinKeyboardStyle, R.style.LatinKeyboard); @@ -179,8 +185,13 @@ public class LatinKeyboard extends Keyboard { } private void updateSpacebarForLocale(boolean isAutoCorrection) { - if (mSpaceKey == null) - return; + if (mSpaceKey == null) return; + final InputMethodManagerCompatWrapper imm = InputMethodManagerCompatWrapper.getInstance(); + if (imm == null) return; + // The "..." popup hint for triggering something by a long-pressing the spacebar + final boolean shouldShowInputMethodPicker = mIsSpacebarTriggeringPopupByLongPress + && Utils.hasMultipleEnabledIMEsOrSubtypes(imm, true /* include aux subtypes */); + mSpaceKey.setNeedsSpecialPopupHint(shouldShowInputMethodPicker); // If application locales are explicitly selected. if (mSubtypeSwitcher.needsToDisplayLanguage()) { mSpaceKey.setIcon(getSpaceDrawable( diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java index b8850680b..2d50a6f46 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java @@ -23,6 +23,7 @@ import android.content.res.Resources; import android.database.Cursor; import android.net.Uri; import android.text.TextUtils; +import android.util.Log; import java.io.File; import java.io.FileInputStream; @@ -40,6 +41,8 @@ import java.util.Locale; * file from the dictionary provider */ public class BinaryDictionaryFileDumper { + private static final String TAG = BinaryDictionaryFileDumper.class.getSimpleName(); + /** * The size of the temporary buffer to copy files. */ @@ -79,8 +82,16 @@ public class BinaryDictionaryFileDumper { * Find out the cache directory associated with a specific locale. */ private static String getCacheDirectoryForLocale(Locale locale, Context context) { - final String directoryName = replaceFileNameDangerousCharacters(locale.toString()); - return context.getFilesDir() + File.separator + directoryName; + final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toString()); + final String absoluteDirectoryName = context.getFilesDir() + File.separator + + relativeDirectoryName; + final File directory = new File(absoluteDirectoryName); + if (!directory.exists()) { + if (!directory.mkdirs()) { + Log.e(TAG, "Could not create the directory for locale" + locale); + } + } + return absoluteDirectoryName; } /** diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index d74babf4f..c28e40d95 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1181,8 +1181,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar if (isShowingOptionDialog()) return; if (InputMethodServiceCompatWrapper.CAN_HANDLE_ON_CURRENT_INPUT_METHOD_SUBTYPE_CHANGED) { showSubtypeSelectorAndSettings(); - } else if (Utils.hasMultipleEnabledIMEsOrSubtypes(mImm, - false /* should exclude auxiliary subtypes */)) { + } else if (Utils.hasMultipleEnabledIMEsOrSubtypes(mImm, false /* exclude aux subtypes */)) { showOptionsMenu(); } else { launchSettings(); @@ -1197,8 +1196,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar if (isShowingOptionDialog()) return false; switch (requestCode) { case CODE_SHOW_INPUT_METHOD_PICKER: - if (Utils.hasMultipleEnabledIMEsOrSubtypes(mImm, - true /* should include auxiliary subtypes */)) { + if (Utils.hasMultipleEnabledIMEsOrSubtypes(mImm, true /* include aux subtypes */)) { mImm.showInputMethodPicker(); return true; } diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java index c07793c33..1a6260a4e 100644 --- a/java/src/com/android/inputmethod/latin/Utils.java +++ b/java/src/com/android/inputmethod/latin/Utils.java @@ -111,8 +111,9 @@ public class Utils { } } - public static boolean hasMultipleEnabledIMEsOrSubtypes(InputMethodManagerCompatWrapper imm, - boolean shouldIncludeAuxiliarySubtypes) { + public static boolean hasMultipleEnabledIMEsOrSubtypes( + final InputMethodManagerCompatWrapper imm, + final boolean shouldIncludeAuxiliarySubtypes) { final List<InputMethodInfoCompatWrapper> enabledImis = imm.getEnabledInputMethodList(); // Number of the filtered IMEs diff --git a/native/Android.mk b/native/Android.mk index 04819e456..f07be6abe 100644 --- a/native/Android.mk +++ b/native/Android.mk @@ -14,7 +14,7 @@ LOCAL_SRC_FILES := \ jni/jni_common.cpp \ src/bigram_dictionary.cpp \ src/char_utils.cpp \ - src/correction_state.cpp \ + src/correction.cpp \ src/dictionary.cpp \ src/proximity_info.cpp \ src/unigram_dictionary.cpp diff --git a/native/src/correction_state.cpp b/native/src/correction.cpp index 0de11ce19..a931a61fb 100644 --- a/native/src/correction_state.cpp +++ b/native/src/correction.cpp @@ -18,9 +18,9 @@ #include <stdio.h> #include <string.h> -#define LOG_TAG "LatinIME: correction_state.cpp" +#define LOG_TAG "LatinIME: correction.cpp" -#include "correction_state.h" +#include "correction.h" #include "proximity_info.h" namespace latinime { @@ -30,20 +30,20 @@ namespace latinime { ////////////////////// static const char QUOTE = '\''; -inline bool CorrectionState::isQuote(const unsigned short c) { +inline bool Correction::isQuote(const unsigned short c) { const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex); return (c == QUOTE && userTypedChar != QUOTE); } -///////////////////// -// CorrectionState // -///////////////////// +//////////////// +// Correction // +//////////////// -CorrectionState::CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier) +Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier) : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) { } -void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength, +void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, const int maxDepth) { mProximityInfo = pi; mInputLength = inputLength; @@ -52,7 +52,12 @@ void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inp mSkippedOutputIndex = -1; } -void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos, +void Correction::initCorrectionState( + const int rootPos, const int childCount, const bool traverseAll) { + mCorrectionStates[0].init(rootPos, childCount, traverseAll); +} + +void Correction::setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos) { mSkipPos = skipPos; mExcessivePos = excessivePos; @@ -61,7 +66,7 @@ void CorrectionState::setCorrectionParams(const int skipPos, const int excessive mMissingSpacePos = missingSpacePos; } -void CorrectionState::checkState() { +void Correction::checkState() { if (DEBUG_DICT) { int inputCount = 0; if (mSkipPos >= 0) ++inputCount; @@ -72,11 +77,11 @@ void CorrectionState::checkState() { } } -int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) { - return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); +int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) { + return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); } -int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { +int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; @@ -86,65 +91,75 @@ int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wo *word = mWord; const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2) : (mInputLength == inputIndex + 1); - return CorrectionState::RankingAlgorithm::calculateFinalFreq( + return Correction::RankingAlgorithm::calculateFinalFreq( inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this); } -void CorrectionState::initProcessState(const int matchCount, const int inputIndex, - const int outputIndex, const bool traverseAllNodes, const int diffs) { - mMatchedCharCount = matchCount; - mInputIndex = inputIndex; +bool Correction::initProcessState(const int outputIndex) { + if (mCorrectionStates[outputIndex].mChildCount <= 0) { + return false; + } mOutputIndex = outputIndex; - mTraverseAllNodes = traverseAllNodes; - mDiffs = diffs; + --(mCorrectionStates[outputIndex].mChildCount); + mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount; + mInputIndex = mCorrectionStates[outputIndex].mInputIndex; + mTraverseAllNodes = mCorrectionStates[outputIndex].mTraverseAll; + mDiffs = mCorrectionStates[outputIndex].mDiffs; + return true; } -void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex, - bool *traverseAllNodes, int *diffs) { - *matchedCount = mMatchedCharCount; - *inputIndex = mInputIndex; - *outputIndex = mOutputIndex; - *traverseAllNodes = mTraverseAllNodes; - *diffs = mDiffs; +int Correction::goDownTree( + const int parentIndex, const int childCount, const int firstChildPos) { + mCorrectionStates[mOutputIndex].mParentIndex = parentIndex; + mCorrectionStates[mOutputIndex].mChildCount = childCount; + mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos; + return mOutputIndex; } -void CorrectionState::charMatched() { +void Correction::charMatched() { ++mMatchedCharCount; } // TODO: remove -int CorrectionState::getOutputIndex() { +int Correction::getOutputIndex() { return mOutputIndex; } // TODO: remove -int CorrectionState::getInputIndex() { +int Correction::getInputIndex() { return mInputIndex; } // TODO: remove -bool CorrectionState::needsToTraverseAll() { +bool Correction::needsToTraverseAll() { return mTraverseAllNodes; } -void CorrectionState::incrementInputIndex() { +void Correction::incrementInputIndex() { ++mInputIndex; } -void CorrectionState::incrementOutputIndex() { +void Correction::incrementOutputIndex() { ++mOutputIndex; + mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; + mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; + mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; + mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount; + mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; + mCorrectionStates[mOutputIndex].mTraverseAll = mTraverseAllNodes; + mCorrectionStates[mOutputIndex].mDiffs = mDiffs; } -void CorrectionState::startTraverseAll() { +void Correction::startTraverseAll() { mTraverseAllNodes = true; } -bool CorrectionState::needsToPrune() const { +bool Correction::needsToPrune() const { return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth) || mDiffs > mMaxEditDistance); } -CorrectionState::CorrectionStateType CorrectionState::processSkipChar( +Correction::CorrectionType Correction::processSkipChar( const int32_t c, const bool isTerminal) { mWord[mOutputIndex] = c; if (needsToTraverseAll() && isTerminal) { @@ -158,9 +173,9 @@ CorrectionState::CorrectionStateType CorrectionState::processSkipChar( } } -CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState( +Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { - CorrectionStateType currentStateType = NOT_ON_TERMINAL; + CorrectionType currentStateType = NOT_ON_TERMINAL; // This has to be done for each virtual char (this forwards the "inputIndex" which // is the index in the user-inputted chars, as read by proximity chars. if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) { @@ -249,7 +264,7 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState( return currentStateType; } -CorrectionState::~CorrectionState() { +Correction::~Correction() { } ///////////////////////// @@ -302,17 +317,17 @@ inline static void multiplyRate(const int rate, int *freq) { // RankingAlgorithm // ////////////////////// -int CorrectionState::RankingAlgorithm::calculateFinalFreq( +int Correction::RankingAlgorithm::calculateFinalFreq( const int inputIndex, const int outputIndex, const int matchCount, const int freq, const bool sameLength, - const CorrectionState* correctionState) { - const int skipPos = correctionState->getSkipPos(); - const int excessivePos = correctionState->getExcessivePos(); - const int transposedPos = correctionState->getTransposedPos(); - const int inputLength = correctionState->mInputLength; - const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER; - const int fullWordMultiplier = correctionState->FULL_WORD_MULTIPLIER; - const ProximityInfo *proximityInfo = correctionState->mProximityInfo; + const Correction* correction) { + const int skipPos = correction->getSkipPos(); + const int excessivePos = correction->getExcessivePos(); + const int transposedPos = correction->getTransposedPos(); + const int inputLength = correction->mInputLength; + const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; + const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; + const ProximityInfo *proximityInfo = correction->mProximityInfo; const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); // TODO: Demote by edit distance @@ -370,10 +385,10 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq( return finalFreq; } -int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords( - const int firstFreq, const int secondFreq, const CorrectionState* correctionState) { - const int spaceProximityPos = correctionState->mSpaceProximityPos; - const int missingSpacePos = correctionState->mMissingSpacePos; +int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( + const int firstFreq, const int secondFreq, const Correction* correction) { + const int spaceProximityPos = correction->mSpaceProximityPos; + const int missingSpacePos = correction->mMissingSpacePos; if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; @@ -381,12 +396,12 @@ int CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords( assert(inputCount <= 1); } const bool isSpaceProximity = spaceProximityPos >= 0; - const int inputLength = correctionState->mInputLength; + const int inputLength = correction->mInputLength; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); - const int typedLetterMultiplier = correctionState->TYPED_LETTER_MULTIPLIER; + const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; if (firstWordLength == 0 || secondWordLength == 0) { return 0; diff --git a/native/src/correction.h b/native/src/correction.h new file mode 100644 index 000000000..590d62f62 --- /dev/null +++ b/native/src/correction.h @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_CORRECTION_H +#define LATINIME_CORRECTION_H + +#include <stdint.h> +#include "correction_state.h" + +#include "defines.h" + +namespace latinime { + +class ProximityInfo; + +class Correction { + +public: + typedef enum { + TRAVERSE_ALL_ON_TERMINAL, + TRAVERSE_ALL_NOT_ON_TERMINAL, + UNRELATED, + ON_TERMINAL, + NOT_ON_TERMINAL + } CorrectionType; + + Correction(const int typedLetterMultiplier, const int fullWordMultiplier); + void initCorrection( + const ProximityInfo *pi, const int inputLength, const int maxWordLength); + void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); + + // TODO: remove + void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, + const int spaceProximityPos, const int missingSpacePos); + void checkState(); + bool initProcessState(const int index); + + void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex, + bool *traverseAllNodes, int *diffs); + int getOutputIndex(); + int getInputIndex(); + bool needsToTraverseAll(); + + virtual ~Correction(); + int getSpaceProximityPos() const { + return mSpaceProximityPos; + } + int getMissingSpacePos() const { + return mMissingSpacePos; + } + + int getSkipPos() const { + return mSkipPos; + } + + int getExcessivePos() const { + return mExcessivePos; + } + + int getTransposedPos() const { + return mTransposedPos; + } + + bool needsToPrune() const; + + int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); + int getFinalFreq(const int freq, unsigned short **word, int* wordLength); + + CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); + + int getDiffs() const { + return mDiffs; + } + + ///////////////////////// + // Tree helper methods + int goDownTree(const int parentIndex, const int childCount, const int firstChildPos); + + inline int getTreeSiblingPos(const int index) const { + return mCorrectionStates[index].mSiblingPos; + } + + inline void setTreeSiblingPos(const int index, const int pos) { + mCorrectionStates[index].mSiblingPos = pos; + } + + inline int getTreeParentIndex(const int index) const { + return mCorrectionStates[index].mParentIndex; + } +private: + void charMatched(); + void incrementInputIndex(); + void incrementOutputIndex(); + void startTraverseAll(); + + // TODO: remove + + void incrementDiffs() { + ++mDiffs; + } + + const int TYPED_LETTER_MULTIPLIER; + const int FULL_WORD_MULTIPLIER; + + const ProximityInfo *mProximityInfo; + + int mMaxEditDistance; + int mMaxDepth; + int mInputLength; + int mSkipPos; + int mSkippedOutputIndex; + int mExcessivePos; + int mTransposedPos; + int mSpaceProximityPos; + int mMissingSpacePos; + + int mMatchedCharCount; + int mInputIndex; + int mOutputIndex; + int mTerminalInputIndex; + int mTerminalOutputIndex; + int mDiffs; + bool mTraverseAllNodes; + unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + + CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL]; + + inline bool isQuote(const unsigned short c); + inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal); + + class RankingAlgorithm { + public: + static int calculateFinalFreq(const int inputIndex, const int depth, + const int matchCount, const int freq, const bool sameLength, + const Correction* correction); + static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, + const Correction* correction); + }; +}; +} // namespace latinime +#endif // LATINIME_CORRECTION_H diff --git a/native/src/correction_state.h b/native/src/correction_state.h index 7ea5aa37d..1fe02b853 100644 --- a/native/src/correction_state.h +++ b/native/src/correction_state.h @@ -23,110 +23,32 @@ namespace latinime { -class ProximityInfo; - class CorrectionState { - public: - typedef enum { - TRAVERSE_ALL_ON_TERMINAL, - TRAVERSE_ALL_NOT_ON_TERMINAL, - UNRELATED, - ON_TERMINAL, - NOT_ON_TERMINAL - } CorrectionStateType; - - CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier); - void initCorrectionState( - const ProximityInfo *pi, const int inputLength, const int maxWordLength); - void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos); - void checkState(); - void initProcessState(const int matchCount, const int inputIndex, const int outputIndex, - const bool traverseAllNodes, const int diffs); - void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex, - bool *traverseAllNodes, int *diffs); - int getOutputIndex(); - int getInputIndex(); - bool needsToTraverseAll(); - - virtual ~CorrectionState(); - int getSpaceProximityPos() const { - return mSpaceProximityPos; - } - int getMissingSpacePos() const { - return mMissingSpacePos; - } - - int getSkipPos() const { - return mSkipPos; - } - - int getExcessivePos() const { - return mExcessivePos; - } + int mParentIndex; + int mMatchedCount; + int mChildCount; + int mInputIndex; + int mDiffs; + int mSiblingPos; + bool mTraverseAll; - int getTransposedPos() const { - return mTransposedPos; + inline void init(const int rootPos, const int childCount, const bool traverseAll) { + set(-1, 0, childCount, 0, 0, rootPos, traverseAll); } - bool needsToPrune() const; - - int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); - int getFinalFreq(const int freq, unsigned short **word, int* wordLength); - - CorrectionStateType processCharAndCalcState(const int32_t c, const bool isTerminal); - - int getDiffs() const { - return mDiffs; - } private: - void charMatched(); - void incrementInputIndex(); - void incrementOutputIndex(); - void startTraverseAll(); - - // TODO: remove - - void incrementDiffs() { - ++mDiffs; + inline void set(const int parentIndex, const int matchedCount, const int childCount, + const int inputIndex, const int diffs, const int siblingPos, + const bool traverseAll) { + mParentIndex = parentIndex; + mMatchedCount = matchedCount; + mChildCount = childCount; + mInputIndex = inputIndex; + mDiffs = diffs; + mSiblingPos = siblingPos; + mTraverseAll = traverseAll; } - - const int TYPED_LETTER_MULTIPLIER; - const int FULL_WORD_MULTIPLIER; - - const ProximityInfo *mProximityInfo; - - int mMaxEditDistance; - int mMaxDepth; - int mInputLength; - int mSkipPos; - int mSkippedOutputIndex; - int mExcessivePos; - int mTransposedPos; - int mSpaceProximityPos; - int mMissingSpacePos; - - int mMatchedCharCount; - int mInputIndex; - int mOutputIndex; - int mTerminalInputIndex; - int mTerminalOutputIndex; - int mDiffs; - bool mTraverseAllNodes; - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; - - inline bool isQuote(const unsigned short c); - inline CorrectionStateType processSkipChar(const int32_t c, const bool isTerminal); - - class RankingAlgorithm { - public: - static int calculateFinalFreq(const int inputIndex, const int depth, - const int matchCount, const int freq, const bool sameLength, - const CorrectionState* correctionState); - static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, - const CorrectionState* correctionState); - }; }; } // namespace latinime -#endif // LATINIME_CORRECTION_INFO_H +#endif // LATINIME_CORRECTION_STATE_H diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h index a9477e41a..5034c3b89 100644 --- a/native/src/proximity_info.h +++ b/native/src/proximity_info.h @@ -23,7 +23,7 @@ namespace latinime { -class CorrectionState; +class Correction; class ProximityInfo { public: diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 93d2b8418..bbfaea454 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -48,11 +48,11 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed if (DEBUG_DICT) { LOGI("UnigramDictionary - constructor"); } - mCorrectionState = new CorrectionState(typedLetterMultiplier, fullWordMultiplier); + mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); } UnigramDictionary::~UnigramDictionary() { - delete mCorrectionState; + delete mCorrection; } static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, @@ -184,7 +184,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) assert(codesSize == mInputLength); const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); - mCorrectionState->initCorrectionState(mProximityInfo, mInputLength, maxDepth); + mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth); PROF_END(0); PROF_START(1); @@ -237,7 +237,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { LOGI("--- Suggest missing space characters %d", i); } - getMissingSpaceWords(mInputLength, i, mCorrectionState); + getMissingSpaceWords(mInputLength, i, mCorrection); } } PROF_END(5); @@ -256,7 +256,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, i, x, y, proximityInfo->hasSpaceProximity(x, y)); } if (proximityInfo->hasSpaceProximity(x, y)) { - getMistypedSpaceWords(mInputLength, i, mCorrectionState); + getMistypedSpaceWords(mInputLength, i, mCorrection); } } } @@ -347,49 +347,33 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, assert(excessivePos < mInputLength); assert(missingPos < mInputLength); } - mCorrectionState->setCorrectionParams(skipPos, excessivePos, transposedPos, + mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos, -1 /* spaceProximityPos */, -1 /* missingSpacePos */); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); - int depth = 0; + int outputIndex = 0; - mStackChildCount[0] = childCount; - mStackTraverseAll[0] = (mInputLength <= 0); - mStackInputIndex[0] = 0; - mStackDiffs[0] = 0; - mStackSiblingPos[0] = rootPosition; - mStackOutputIndex[0] = 0; - mStackMatchedCount[0] = 0; + mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0)); // Depth first search - while (depth >= 0) { - if (mStackChildCount[depth] > 0) { - --mStackChildCount[depth]; - int siblingPos = mStackSiblingPos[depth]; + while (outputIndex >= 0) { + if (mCorrection->initProcessState(outputIndex)) { + int siblingPos = mCorrection->getTreeSiblingPos(outputIndex); int firstChildPos; - mCorrectionState->initProcessState( - mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth], - mStackTraverseAll[depth], mStackDiffs[depth]); - // needsToTraverseChildrenNodes should be false const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - mCorrectionState, &childCount, &firstChildPos, &siblingPos); + mCorrection, &childCount, &firstChildPos, &siblingPos); // Update next sibling pos - mStackSiblingPos[depth] = siblingPos; + mCorrection->setTreeSiblingPos(outputIndex, siblingPos); + if (needsToTraverseChildrenNodes) { // Goes to child node - ++depth; - mStackChildCount[depth] = childCount; - mStackSiblingPos[depth] = firstChildPos; - - mCorrectionState->getProcessState(&mStackMatchedCount[depth], - &mStackInputIndex[depth], &mStackOutputIndex[depth], - &mStackTraverseAll[depth], &mStackDiffs[depth]); + outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos); } } else { // Goes to parent sibling node - --depth; + outputIndex = mCorrection->getTreeParentIndex(outputIndex); } } } @@ -409,17 +393,17 @@ inline static void multiplyIntCapped(const int multiplier, int *base) { } void UnigramDictionary::getMissingSpaceWords( - const int inputLength, const int missingSpacePos, CorrectionState *correctionState) { - correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, + const int inputLength, const int missingSpacePos, Correction *correction) { + correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos); - getSplitTwoWordsSuggestion(inputLength, correctionState); + getSplitTwoWordsSuggestion(inputLength, correction); } void UnigramDictionary::getMistypedSpaceWords( - const int inputLength, const int spaceProximityPos, CorrectionState *correctionState) { - correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, + const int inputLength, const int spaceProximityPos, Correction *correction) { + correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */); - getSplitTwoWordsSuggestion(inputLength, correctionState); + getSplitTwoWordsSuggestion(inputLength, correction); } inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, @@ -429,19 +413,19 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; } -inline void UnigramDictionary::onTerminal(const int freq, CorrectionState *correctionState) { +inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) { int wordLength; unsigned short* wordPointer; - const int finalFreq = correctionState->getFinalFreq(freq, &wordPointer, &wordLength); + const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { addWord(wordPointer, wordLength, finalFreq); } } void UnigramDictionary::getSplitTwoWordsSuggestion( - const int inputLength, CorrectionState* correctionState) { - const int spaceProximityPos = correctionState->getSpaceProximityPos(); - const int missingSpacePos = correctionState->getMissingSpacePos(); + const int inputLength, Correction* correction) { + const int spaceProximityPos = correction->getSpaceProximityPos(); + const int missingSpacePos = correction->getMissingSpacePos(); if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; @@ -485,7 +469,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( word[i] = mWord[i - firstWordLength - 1]; } - const int pairFreq = mCorrectionState->getFreqForSplitTwoWords(firstFreq, secondFreq); + const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } @@ -650,10 +634,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, - CorrectionState *correctionState, int *newCount, + Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition) { if (DEBUG_DICT) { - correctionState->checkState(); + correction->checkState(); } int pos = initialPos; @@ -697,12 +681,12 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // If we are on the last char, this virtual node is a terminal if this node is. const bool isTerminal = isLastChar && isTerminalNode; - CorrectionState::CorrectionStateType stateType = correctionState->processCharAndCalcState( + Correction::CorrectionType stateType = correction->processCharAndCalcState( c, isTerminal); - if (stateType == CorrectionState::TRAVERSE_ALL_ON_TERMINAL - || stateType == CorrectionState::ON_TERMINAL) { + if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL + || stateType == Correction::ON_TERMINAL) { needsToInvokeOnTerminal = true; - } else if (stateType == CorrectionState::UNRELATED) { + } else if (stateType == Correction::UNRELATED) { // We found that this is an unrelated character, so we should give up traversing // this node and its children entirely. // However we may not be on the last virtual node yet so we skip the remaining @@ -730,7 +714,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); - onTerminal(freq, mCorrectionState); + onTerminal(freq, mCorrection); } // If there are more chars in this node, then this virtual node has children. @@ -751,7 +735,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, } // Optimization: Prune out words that are too long compared to how much was typed. - if (correctionState->needsToPrune()) { + if (correction->needsToPrune()) { pos = BinaryFormat::skipFrequency(flags, pos); *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index a45df24fb..cfe63ff79 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -18,6 +18,7 @@ #define LATINIME_UNIGRAM_DICTIONARY_H #include <stdint.h> +#include "correction.h" #include "correction_state.h" #include "defines.h" #include "proximity_info.h" @@ -89,17 +90,17 @@ private: void getSuggestionCandidates(const int skipPos, const int excessivePos, const int transposedPos); bool addWord(unsigned short *word, int length, int frequency); - void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState); + void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); void getMissingSpaceWords( - const int inputLength, const int missingSpacePos, CorrectionState *correctionState); + const int inputLength, const int missingSpacePos, Correction *correction); void getMistypedSpaceWords( - const int inputLength, const int spaceProximityPos, CorrectionState *correctionState); - void onTerminal(const int freq, CorrectionState *correctionState); + const int inputLength, const int spaceProximityPos, Correction *correction); + void onTerminal(const int freq, Correction *correction); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character bool processCurrentNode(const int initialPos, - CorrectionState *correctionState, int *newCount, + Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, unsigned short *word); @@ -129,18 +130,14 @@ private: int *mFrequencies; unsigned short *mOutputChars; ProximityInfo *mProximityInfo; - CorrectionState *mCorrectionState; + Correction *mCorrection; int mInputLength; // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; - int mStackMatchedCount[MAX_WORD_LENGTH_INTERNAL]; - int mStackChildCount[MAX_WORD_LENGTH_INTERNAL]; - bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL]; - int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL]; - int mStackDiffs[MAX_WORD_LENGTH_INTERNAL]; - int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; - int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL]; + int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove + int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove + int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove }; } // namespace latinime |