diff options
-rw-r--r-- | java/src/com/android/inputmethod/latin/ExpandableDictionary.java | 2 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/SuggestionsView.java | 169 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/UserDictionary.java | 3 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/Utils.java | 12 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java | 12 | ||||
-rw-r--r-- | native/src/correction.cpp | 105 | ||||
-rw-r--r-- | native/src/correction.h | 5 | ||||
-rw-r--r-- | native/src/defines.h | 4 | ||||
-rw-r--r-- | native/src/dictionary.cpp | 5 | ||||
-rw-r--r-- | native/src/dictionary.h | 6 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 61 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 17 | ||||
-rw-r--r-- | native/src/words_priority_queue.h | 58 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 53 |
14 files changed, 303 insertions, 209 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index cad69bb0e..7eec8e2ca 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -51,6 +51,7 @@ public class ExpandableDictionary extends Dictionary { private Object mUpdatingLock = new Object(); private static class Node { + Node() {} char mCode; int mFrequency; boolean mTerminal; @@ -547,6 +548,7 @@ public class ExpandableDictionary extends Dictionary { } private class LoadDictionaryTask extends Thread { + LoadDictionaryTask() {} @Override public void run() { loadDictionaryAsync(); diff --git a/java/src/com/android/inputmethod/latin/SuggestionsView.java b/java/src/com/android/inputmethod/latin/SuggestionsView.java index 10f5ec9db..dbd4677f0 100644 --- a/java/src/com/android/inputmethod/latin/SuggestionsView.java +++ b/java/src/com/android/inputmethod/latin/SuggestionsView.java @@ -468,6 +468,91 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, setLayoutWeight( hintView, 1.0f - mCenterSuggestionWeight, ViewGroup.LayoutParams.MATCH_PARENT); } + + private static CharSequence getDebugInfo(SuggestedWords suggestions, int pos) { + if (DBG && pos < suggestions.size()) { + final SuggestedWordInfo wordInfo = suggestions.getInfo(pos); + if (wordInfo != null) { + final CharSequence debugInfo = wordInfo.getDebugString(); + if (!TextUtils.isEmpty(debugInfo)) { + return debugInfo; + } + } + } + return null; + } + + private static void setLayoutWeight(View v, float weight, int height) { + final ViewGroup.LayoutParams lp = v.getLayoutParams(); + if (lp instanceof LinearLayout.LayoutParams) { + final LinearLayout.LayoutParams llp = (LinearLayout.LayoutParams)lp; + llp.weight = weight; + llp.width = 0; + llp.height = height; + } + } + + private static float getTextScaleX(CharSequence text, int maxWidth, TextPaint paint) { + paint.setTextScaleX(1.0f); + final int width = getTextWidth(text, paint); + if (width <= maxWidth) { + return 1.0f; + } + return maxWidth / (float)width; + } + + private static CharSequence getEllipsizedText(CharSequence text, int maxWidth, + TextPaint paint) { + if (text == null) return null; + paint.setTextScaleX(1.0f); + final int width = getTextWidth(text, paint); + if (width <= maxWidth) { + return text; + } + final float scaleX = maxWidth / (float)width; + if (scaleX >= MIN_TEXT_XSCALE) { + paint.setTextScaleX(scaleX); + return text; + } + + // Note that TextUtils.ellipsize() use text-x-scale as 1.0 if ellipsize is needed. To + // get squeezed and ellipsized text, passes enlarged width (maxWidth / MIN_TEXT_XSCALE). + final CharSequence ellipsized = TextUtils.ellipsize( + text, paint, maxWidth / MIN_TEXT_XSCALE, TextUtils.TruncateAt.MIDDLE); + paint.setTextScaleX(MIN_TEXT_XSCALE); + return ellipsized; + } + + private static int getTextWidth(CharSequence text, TextPaint paint) { + if (TextUtils.isEmpty(text)) return 0; + final Typeface savedTypeface = paint.getTypeface(); + paint.setTypeface(getTextTypeface(text)); + final int len = text.length(); + final float[] widths = new float[len]; + final int count = paint.getTextWidths(text, 0, len, widths); + int width = 0; + for (int i = 0; i < count; i++) { + width += Math.round(widths[i] + 0.5f); + } + paint.setTypeface(savedTypeface); + return width; + } + + private static Typeface getTextTypeface(CharSequence text) { + if (!(text instanceof SpannableString)) + return Typeface.DEFAULT; + + final SpannableString ss = (SpannableString)text; + final StyleSpan[] styles = ss.getSpans(0, text.length(), StyleSpan.class); + if (styles.length == 0) + return Typeface.DEFAULT; + + switch (styles[0].getStyle()) { + case Typeface.BOLD: return Typeface.DEFAULT_BOLD; + // TODO: BOLD_ITALIC, ITALIC case? + default: return Typeface.DEFAULT; + } + } } /** @@ -554,90 +639,6 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, mParams.layout(mSuggestions, mSuggestionsStrip, this, getWidth()); } - private static CharSequence getDebugInfo(SuggestedWords suggestions, int pos) { - if (DBG && pos < suggestions.size()) { - final SuggestedWordInfo wordInfo = suggestions.getInfo(pos); - if (wordInfo != null) { - final CharSequence debugInfo = wordInfo.getDebugString(); - if (!TextUtils.isEmpty(debugInfo)) { - return debugInfo; - } - } - } - return null; - } - - private static void setLayoutWeight(View v, float weight, int height) { - final ViewGroup.LayoutParams lp = v.getLayoutParams(); - if (lp instanceof LinearLayout.LayoutParams) { - final LinearLayout.LayoutParams llp = (LinearLayout.LayoutParams)lp; - llp.weight = weight; - llp.width = 0; - llp.height = height; - } - } - - private static float getTextScaleX(CharSequence text, int maxWidth, TextPaint paint) { - paint.setTextScaleX(1.0f); - final int width = getTextWidth(text, paint); - if (width <= maxWidth) { - return 1.0f; - } - return maxWidth / (float)width; - } - - private static CharSequence getEllipsizedText(CharSequence text, int maxWidth, - TextPaint paint) { - if (text == null) return null; - paint.setTextScaleX(1.0f); - final int width = getTextWidth(text, paint); - if (width <= maxWidth) { - return text; - } - final float scaleX = maxWidth / (float)width; - if (scaleX >= MIN_TEXT_XSCALE) { - paint.setTextScaleX(scaleX); - return text; - } - - // Note that TextUtils.ellipsize() use text-x-scale as 1.0 if ellipsize is needed. To get - // squeezed and ellipsized text, passes enlarged width (maxWidth / MIN_TEXT_XSCALE). - final CharSequence ellipsized = TextUtils.ellipsize( - text, paint, maxWidth / MIN_TEXT_XSCALE, TextUtils.TruncateAt.MIDDLE); - paint.setTextScaleX(MIN_TEXT_XSCALE); - return ellipsized; - } - - private static int getTextWidth(CharSequence text, TextPaint paint) { - if (TextUtils.isEmpty(text)) return 0; - final Typeface savedTypeface = paint.getTypeface(); - paint.setTypeface(getTextTypeface(text)); - final int len = text.length(); - final float[] widths = new float[len]; - final int count = paint.getTextWidths(text, 0, len, widths); - int width = 0; - for (int i = 0; i < count; i++) { - width += Math.round(widths[i] + 0.5f); - } - paint.setTypeface(savedTypeface); - return width; - } - - private static Typeface getTextTypeface(CharSequence text) { - if (!(text instanceof SpannableString)) - return Typeface.DEFAULT; - - final SpannableString ss = (SpannableString)text; - final StyleSpan[] styles = ss.getSpans(0, text.length(), StyleSpan.class); - if (styles.length == 0) - return Typeface.DEFAULT; - - switch (styles[0].getStyle()) { - case Typeface.BOLD: return Typeface.DEFAULT_BOLD; - // TODO: BOLD_ITALIC, ITALIC case? - default: return Typeface.DEFAULT; - } - } public boolean isShowingAddToDictionaryHint() { return mSuggestionsStrip.getChildCount() > 0 diff --git a/java/src/com/android/inputmethod/latin/UserDictionary.java b/java/src/com/android/inputmethod/latin/UserDictionary.java index 8c28324d8..6d6296e10 100644 --- a/java/src/com/android/inputmethod/latin/UserDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserDictionary.java @@ -18,13 +18,10 @@ package com.android.inputmethod.latin; import android.content.ContentProviderClient; import android.content.ContentResolver; -import android.content.ContentValues; import android.content.Context; import android.content.Intent; import android.database.ContentObserver; import android.database.Cursor; -import android.net.Uri; -import android.os.RemoteException; import android.provider.UserDictionary.Words; import android.text.TextUtils; diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java index a134647f2..bfa51897e 100644 --- a/java/src/com/android/inputmethod/latin/Utils.java +++ b/java/src/com/android/inputmethod/latin/Utils.java @@ -544,13 +544,13 @@ public class Utils { final String currentDateTimeString = new SimpleDateFormat("yyyyMMdd-HHmmssZ").format(date); if (mFile == null) { - Log.w(TAG, "No internal log file found."); + Log.w(USABILITY_TAG, "No internal log file found."); return; } if (mIms.checkCallingOrSelfPermission( android.Manifest.permission.WRITE_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED) { - Log.w(TAG, "Doesn't have the permission WRITE_EXTERNAL_STORAGE"); + Log.w(USABILITY_TAG, "Doesn't have the permission WRITE_EXTERNAL_STORAGE"); return; } mWriter.flush(); @@ -564,20 +564,20 @@ public class Utils { src.close(); dest.close(); } catch (FileNotFoundException e1) { - Log.w(TAG, e1); + Log.w(USABILITY_TAG, e1); return; } catch (IOException e2) { - Log.w(TAG, e2); + Log.w(USABILITY_TAG, e2); return; } if (destFile == null || !destFile.exists()) { - Log.w(TAG, "Dest file doesn't exist."); + Log.w(USABILITY_TAG, "Dest file doesn't exist."); return; } final Intent intent = new Intent(Intent.ACTION_SEND); intent.setFlags(Intent.FLAG_ACTIVITY_NEW_TASK); if (LatinImeLogger.sDBG) { - Log.d(TAG, "Destination file URI is " + destFile.toURI()); + Log.d(USABILITY_TAG, "Destination file URI is " + destFile.toURI()); } intent.setType("text/plain"); intent.putExtra(Intent.EXTRA_STREAM, Uri.parse("file://" + destPath)); diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java index 9a2bebfdf..2bc2cfdf6 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java @@ -43,7 +43,7 @@ public class SpellCheckerProximityInfo { return result; } - static class Latin { + private static class Latin { // This is a map from the code point to the index in the PROXIMITY array. // At the time the native code to read the binary dictionary needs the proximity info be // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input @@ -62,7 +62,7 @@ public class SpellCheckerProximityInfo { // to spell check has been entered with one of the keyboards above. Also, specifically // to English, many spelling errors consist of the last vowel of the word being wrong // because in English vowels tend to merge with each other in pronunciation. - final private static int[] PROXIMITY = { + final static int[] PROXIMITY = { 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, @@ -101,14 +101,14 @@ public class SpellCheckerProximityInfo { static { buildProximityIndices(PROXIMITY, INDICES); } - private static int getIndexOf(int characterCode) { + static int getIndexOf(int characterCode) { return computeIndex(characterCode, INDICES); } } - static class Cyrillic { + private static class Cyrillic { final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); - final private static int[] PROXIMITY = { + final static int[] PROXIMITY = { // TODO: This table is solely based on the keyboard layout. Consult with Russian // speakers on commonly misspelled words/letters. 'ะน', 'ั', 'ั', 'ั', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, @@ -150,7 +150,7 @@ public class SpellCheckerProximityInfo { static { buildProximityIndices(PROXIMITY, INDICES); } - private static int getIndexOf(int characterCode) { + static int getIndexOf(int characterCode) { return computeIndex(characterCode, INDICES); } } diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 22ee75a24..2da82dc3d 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -32,48 +32,6 @@ namespace latinime { // edit distance funcitons // ///////////////////////////// -#if 0 /* no longer used */ -inline static int editDistance( - int* editDistanceTable, const unsigned short* input, - const int inputLength, const unsigned short* output, const int outputLength) { - // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int* dp = editDistanceTable; - const int li = inputLength + 1; - const int lo = outputLength + 1; - for (int i = 0; i < li; ++i) { - dp[lo * i] = i; - } - for (int i = 0; i < lo; ++i) { - dp[i] = i; - } - - for (int i = 0; i < li - 1; ++i) { - for (int j = 0; j < lo - 1; ++j) { - const uint32_t ci = toBaseLowerCase(input[i]); - const uint32_t co = toBaseLowerCase(output[j]); - const uint16_t cost = (ci == co) ? 0 : 1; - dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, - min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); - if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1]) - && co == toBaseLowerCase(input[i - 1])) { - dp[(i + 1) * lo + (j + 1)] = min( - dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); - } - } - } - - if (DEBUG_EDIT_DISTANCE) { - LOGI("IN = %d, OUT = %d", inputLength, outputLength); - for (int i = 0; i < li; ++i) { - for (int j = 0; j < lo; ++j) { - LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); - } - } - } - return dp[li * lo - 1]; -} -#endif - inline static void initEditDistance(int *editDistanceTable) { for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { editDistanceTable[i] = i; @@ -145,7 +103,7 @@ void Correction::initCorrectionState( void Correction::setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos, - const bool useFullEditDistance) { + const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) { // TODO: remove mTransposedPos = transposedPos; mExcessivePos = excessivePos; @@ -158,6 +116,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos, mSpaceProximityPos = spaceProximityPos; mMissingSpacePos = missingSpacePos; mUseFullEditDistance = useFullEditDistance; + mDoAutoCompletion = doAutoCompletion; + mMaxErrors = maxErrors; } void Correction::checkState() { @@ -279,7 +239,9 @@ void Correction::startToTraverseAllNodes() { bool Correction::needsToPrune() const { // TODO: use edit distance here - return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance; + return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance + // Allow one char longer word for missing character + || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength)); } void Correction::addCharToCurrentWord(const int32_t c) { @@ -311,12 +273,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) { Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); + if (correctionCount > mMaxErrors) { + return UNRELATED; + } + // TODO: Change the limit if we'll allow two or more corrections const bool noCorrectionsHappenedSoFar = correctionCount == 0; const bool canTryCorrection = noCorrectionsHappenedSoFar; int proximityIndex = 0; mDistances[mOutputIndex] = NOT_A_DISTANCE; + // Skip checking this node if (mNeedsToTraverseAllNodes || isQuote(c)) { bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { @@ -341,6 +308,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( return processSkipChar(c, isTerminal, incremented); } + // Check possible corrections. if (mExcessivePos >= 0) { if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { mExcessivePos = mOutputIndex; @@ -391,7 +359,12 @@ Correction::CorrectionType Correction::processCharAndCalcState( } // TODO: Change the limit if we'll allow two or more proximity chars with corrections - const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0; + // Work around: When the mMaxErrors is 1, we only allow just one error + // including proximity correction. + const bool checkProximityChars = (mMaxErrors > 1) + ? (noCorrectionsHappenedSoFar || mProximityCount == 0) + : (noCorrectionsHappenedSoFar && mProximityCount == 0); + ProximityInfo::ProximityType matchedProximityCharId = secondTransposing ? ProximityInfo::EQUIVALENT_CHAR : mProximityInfo->getMatchedProximityId( @@ -931,4 +904,46 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( return totalFreq; } +#if 0 /* no longer used. keep just for reference */ +inline static int editDistance( + int* editDistanceTable, const unsigned short* input, + const int inputLength, const unsigned short* output, const int outputLength) { + // dp[li][lo] dp[a][b] = dp[ a * lo + b] + int* dp = editDistanceTable; + const int li = inputLength + 1; + const int lo = outputLength + 1; + for (int i = 0; i < li; ++i) { + dp[lo * i] = i; + } + for (int i = 0; i < lo; ++i) { + dp[i] = i; + } + + for (int i = 0; i < li - 1; ++i) { + for (int j = 0; j < lo - 1; ++j) { + const uint32_t ci = toBaseLowerCase(input[i]); + const uint32_t co = toBaseLowerCase(output[j]); + const uint16_t cost = (ci == co) ? 0 : 1; + dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, + min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); + if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1]) + && co == toBaseLowerCase(input[i - 1])) { + dp[(i + 1) * lo + (j + 1)] = min( + dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); + } + } + } + + if (DEBUG_EDIT_DISTANCE) { + LOGI("IN = %d, OUT = %d", inputLength, outputLength); + for (int i = 0; i < li; ++i) { + for (int j = 0; j < lo; ++j) { + LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); + } + } + } + return dp[li * lo - 1]; +} +#endif + } // namespace latinime diff --git a/native/src/correction.h b/native/src/correction.h index d4e99f0ce..e55be8dd6 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -44,7 +44,8 @@ public: // TODO: remove void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance); + const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, + const bool doAutoCompletion, const int maxErrors); void checkState(); bool initProcessState(const int index); @@ -109,6 +110,7 @@ private: const ProximityInfo *mProximityInfo; bool mUseFullEditDistance; + bool mDoAutoCompletion; int mMaxEditDistance; int mMaxDepth; int mInputLength; @@ -116,6 +118,7 @@ private: int mMissingSpacePos; int mTerminalInputIndex; int mTerminalOutputIndex; + int mMaxErrors; // The following arrays are state buffer. unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; diff --git a/native/src/defines.h b/native/src/defines.h index ea643bf07..e1d2ca351 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -202,6 +202,10 @@ static void dumpWord(const unsigned short* word, const int length) { // This is only used for the size of array. Not to be used in c functions. #define MAX_WORD_LENGTH_INTERNAL 48 +// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used +// for better performance. +#define SUB_QUEUE_MAX_WORDS 5 + #define MAX_DEPTH_MULTIPLIER 3 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index 55358ec81..e3673d425 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -39,7 +39,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, } } mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); - mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength); + mWordsPriorityQueuePool = new WordsPriorityQueuePool( + maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength); mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives, @@ -48,7 +49,7 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, Dictionary::~Dictionary() { delete mCorrection; - delete mWordsPriorityQueue; + delete mWordsPriorityQueuePool; delete mUnigramDictionary; delete mBigramDictionary; } diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 2a6e4e0d5..52048ecca 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -23,7 +23,7 @@ #include "defines.h" #include "proximity_info.h" #include "unigram_dictionary.h" -#include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -34,7 +34,7 @@ public: int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) { - return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueue, + return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, mCorrection, xcoordinates, ycoordinates, codes, codesSize, flags, outWords, frequencies); } @@ -81,7 +81,7 @@ private: const bool IS_LATEST_DICT_VERSION; UnigramDictionary *mUnigramDictionary; BigramDictionary *mBigramDictionary; - WordsPriorityQueue *mWordsPriorityQueue; + WordsPriorityQueuePool *mWordsPriorityQueuePool; Correction *mCorrection; }; diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index a2c1f72a1..0db33e7d8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -93,7 +93,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, - WordsPriorityQueue *queue) { + WordsPriorityQueuePool *queuePool) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { @@ -110,7 +110,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, - currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, queue); + currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, + queuePool); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. @@ -120,7 +121,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1, - codesDest + i * MAX_PROXIMITY_CHARS, correction, queue); + codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool); return; } } @@ -137,27 +138,28 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction, - queue); + queuePool); } -int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue, - Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int codesSize, const int flags, unsigned short *outWords, int *frequencies) { +int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, + WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int codesSize, const int flags, + unsigned short *outWords, int *frequencies) { - WordsPriorityQueue* masterQueue = queue; Correction* masterCorrection = correction; if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags) { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, masterQueue); + codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags, - masterCorrection, masterQueue); + masterCorrection, queuePool); } PROF_START(20); - const int suggestedWordsCount = masterQueue->outputSuggestions(frequencies, outWords); + const int suggestedWordsCount = + queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); @@ -178,11 +180,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorit void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputLength, const int flags, Correction *correction, WordsPriorityQueue *queue) { + const int inputLength, const int flags, Correction *correction, + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); PROF_OPEN; PROF_START(0); - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, queue); + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue); if (DEBUG_DICT) assert(codesSize == inputLength); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); @@ -192,7 +196,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags; // TODO: remove PROF_START(1); - getSuggestionCandidates(useFullEditDistance, inputLength, correction, queue); + getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue); PROF_END(1); PROF_START(2); @@ -216,7 +220,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, LOGI("--- Suggest missing space characters %d", i); } getMissingSpaceWords( - inputLength, i, proximityInfo, correction, useFullEditDistance, queue); + inputLength, i, proximityInfo, correction, useFullEditDistance, queuePool); } } PROF_END(5); @@ -235,8 +239,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, i, x, y, proximityInfo->hasSpaceProximity(x, y)); } if (proximityInfo->hasSpaceProximity(x, y)) { - getMistypedSpaceWords( - inputLength, i, proximityInfo, correction, useFullEditDistance, queue); + getMistypedSpaceWords(inputLength, i, proximityInfo, correction, + useFullEditDistance, queuePool); } } } @@ -260,7 +264,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, - -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance); + -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, + true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); @@ -292,20 +297,20 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getMissingSpaceWords( const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo, - Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { + Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, - useFullEditDistance); - getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); + useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); + getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool); } void UnigramDictionary::getMistypedSpaceWords( const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo, - Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { + Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, - useFullEditDistance); - getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); + useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); + getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool); } inline void UnigramDictionary::onTerminal( @@ -320,7 +325,9 @@ inline void UnigramDictionary::onTerminal( void UnigramDictionary::getSplitTwoWordsSuggestion( const int inputLength, ProximityInfo *proximityInfo, Correction *correction, - WordsPriorityQueue *queue) { + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + const int spaceProximityPos = correction->getSpaceProximityPos(); const int missingSpacePos = correction->getMissingSpacePos(); if (DEBUG_DICT) { @@ -372,7 +379,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } - addWord(word, newWordLength, pairFreq, queue); + addWord(word, newWordLength, pairFreq, masterQueue); return; } @@ -585,7 +592,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL || stateType == Correction::ON_TERMINAL) { needsToInvokeOnTerminal = true; - } else if (stateType == Correction::UNRELATED) { + } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) { // We found that this is an unrelated character, so we should give up traversing // this node and its children entirely. // However we may not be on the last virtual node yet so we skip the remaining diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 0b0126524..ce15cdd8f 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -23,6 +23,7 @@ #include "defines.h" #include "proximity_info.h" #include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -61,12 +62,16 @@ public: static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; + // Error tolerances + static const int DEFAULT_MAX_ERRORS = 2; + static const int MAX_ERRORS_FOR_TWO_WORDS = 1; + UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); bool isValidWord(const uint16_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue, + int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); @@ -76,13 +81,13 @@ private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, - const int flags, Correction *correction, WordsPriorityQueue *queue); + const int flags, Correction *correction, WordsPriorityQueuePool *queuePool); bool isDigraph(const int *codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, - WordsPriorityQueue* queue); + WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue); @@ -90,13 +95,13 @@ private: const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue); void getSplitTwoWordsSuggestion(const int inputLength, ProximityInfo *proximityInfo, - Correction *correction, WordsPriorityQueue *queue); + Correction *correction, WordsPriorityQueuePool *queuePool); void getMissingSpaceWords(const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo, Correction *correction, - const bool useFullEditDistance, WordsPriorityQueue *queue); + const bool useFullEditDistance, WordsPriorityQueuePool *queuePool); void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo, Correction *correction, - const bool useFullEditDistance, WordsPriorityQueue *queue); + const bool useFullEditDistance, WordsPriorityQueuePool *queuePool); void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h index 366b1b67a..a4175d3e0 100644 --- a/native/src/words_priority_queue.h +++ b/native/src/words_priority_queue.h @@ -24,7 +24,7 @@ namespace latinime { class WordsPriorityQueue { -private: +public: class SuggestedWord { public: int mScore; @@ -40,31 +40,6 @@ private: } }; - struct wordComparator { - bool operator ()(SuggestedWord * left, SuggestedWord * right) { - return left->mScore > right->mScore; - } - }; - - SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, - int wordLength) { - for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { - if (!mSuggestedWords[i].mUsed) { - mSuggestedWords[i].setParams(score, word, wordLength); - return &mSuggestedWords[i]; - } - } - return 0; - } - - typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, - wordComparator> Suggestions; - Suggestions mSuggestions; - const unsigned int MAX_WORDS; - const unsigned int MAX_WORD_LENGTH; - SuggestedWord* mSuggestedWords; - -public: WordsPriorityQueue(int maxWords, int maxWordLength) : MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( (unsigned int) maxWordLength) { @@ -105,6 +80,13 @@ public: mSuggestions.push(sw); } + SuggestedWord* topAndPop() { + if (mSuggestions.empty()) return 0; + SuggestedWord* sw = mSuggestions.top(); + mSuggestions.pop(); + return sw; + } + int outputSuggestions(int *frequencies, unsigned short *outputChars) { const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; @@ -140,6 +122,30 @@ public: mSuggestions.pop(); } } +private: + struct wordComparator { + bool operator ()(SuggestedWord * left, SuggestedWord * right) { + return left->mScore > right->mScore; + } + }; + + SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + int wordLength) { + for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + if (!mSuggestedWords[i].mUsed) { + mSuggestedWords[i].setParams(score, word, wordLength); + return &mSuggestedWords[i]; + } + } + return 0; + } + + typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, + wordComparator> Suggestions; + Suggestions mSuggestions; + const unsigned int MAX_WORDS; + const unsigned int MAX_WORD_LENGTH; + SuggestedWord* mSuggestedWords; }; } diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h new file mode 100644 index 000000000..d964bfc3b --- /dev/null +++ b/native/src/words_priority_queue_pool.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H +#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H + +#include "words_priority_queue.h" + +namespace latinime { + +class WordsPriorityQueuePool { +public: + WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { + mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength); + mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + } + + ~WordsPriorityQueuePool() { + delete mMasterQueue; + } + + WordsPriorityQueue* getMasterQueue() { + return mMasterQueue; + } + // TODO: Come up with more generic pool + WordsPriorityQueue* getSubQueue1() { + return mSubQueue1; + } + WordsPriorityQueue* getSubQueue2() { + return mSubQueue2; + } +private: + WordsPriorityQueue *mMasterQueue; + WordsPriorityQueue *mSubQueue1; + WordsPriorityQueue *mSubQueue2; +}; +} + +#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H |