diff options
25 files changed, 258 insertions, 156 deletions
diff --git a/java/res/values/attrs.xml b/java/res/values/attrs.xml index c78013f73..e14633a2e 100644 --- a/java/res/values/attrs.xml +++ b/java/res/values/attrs.xml @@ -64,7 +64,7 @@ <attr name="keyShiftedLetterHintRatio" format="float" /> <!-- Horizontal padding of left/right aligned key label to the edge of the key. --> <attr name="keyLabelHorizontalPadding" format="dimension" /> - <!-- Top and right padding of hint letter to the edge of the key.--> + <!-- Right padding of hint letter to the edge of the key.--> <attr name="keyHintLetterPadding" format="dimension" /> <!-- Bottom padding of popup hint letter "..." to the edge of the key.--> <attr name="keyPopupHintLetterPadding" format="dimension" /> diff --git a/java/res/xml/rowkeys_symbols3.xml b/java/res/xml/rowkeys_symbols3.xml index 294e94d31..0518c1954 100644 --- a/java/res/xml/rowkeys_symbols3.xml +++ b/java/res/xml/rowkeys_symbols3.xml @@ -21,10 +21,6 @@ <merge xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin" > - <Key - latin:keyStyle="toMoreSymbolKeyStyle" - latin:keyWidth="15%p" - latin:visualInsetsRight="1%p" /> <!-- U+00A1: "¡" INVERTED EXCLAMATION MARK --> <Key latin:keyLabel="!" @@ -45,8 +41,4 @@ <Key latin:keyLabel="!text/keylabel_for_symbols_question" latin:moreKeys="!text/more_keys_for_symbols_question" /> - <Key - latin:keyStyle="deleteKeyStyle" - latin:keyWidth="fillRight" - latin:visualInsetsLeft="1%p" /> </merge> diff --git a/java/res/xml/rowkeys_symbols_shift1.xml b/java/res/xml/rowkeys_symbols_shift1.xml index 199e4494f..fea8ae337 100644 --- a/java/res/xml/rowkeys_symbols_shift1.xml +++ b/java/res/xml/rowkeys_symbols_shift1.xml @@ -34,10 +34,11 @@ <!-- U+221A: "√" SQUARE ROOT --> <Key latin:keyLabel="√" /> - <!-- U+03C0: "π" GREEK SMALL LETTER PI --> + <!-- U+03C0: "π" GREEK SMALL LETTER PI + U+03A0: "Π" GREEK CAPITAL LETTER PI --> <Key latin:keyLabel="π" - latin:moreKeys="Π" /> + latin:moreKeys="Π" /> <!-- U+00F7: "÷" DIVISION SIGN --> <Key latin:keyLabel="÷" /> diff --git a/java/res/xml/rowkeys_symbols_shift3.xml b/java/res/xml/rowkeys_symbols_shift3.xml index f5db0fee6..a35af218f 100644 --- a/java/res/xml/rowkeys_symbols_shift3.xml +++ b/java/res/xml/rowkeys_symbols_shift3.xml @@ -21,10 +21,6 @@ <merge xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin" > - <Key - latin:keyStyle="backFromMoreSymbolKeyStyle" - latin:keyWidth="15%p" - latin:visualInsetsRight="1%p" /> <!-- U+2122: "™" TRADE MARK SIGN --> <Key latin:keyLabel="™" /> @@ -43,8 +39,4 @@ latin:keyLabel="\\" /> <include latin:keyboardLayout="@xml/keys_less_greater" /> - <Key - latin:keyStyle="deleteKeyStyle" - latin:keyWidth="fillRight" - latin:visualInsetsLeft="1%p" /> </merge> diff --git a/java/res/xml/rows_symbols.xml b/java/res/xml/rows_symbols.xml index 2a73ffbf9..bd1a57e75 100644 --- a/java/res/xml/rows_symbols.xml +++ b/java/res/xml/rows_symbols.xml @@ -40,8 +40,16 @@ <Row latin:keyWidth="10%p" > + <Key + latin:keyStyle="toMoreSymbolKeyStyle" + latin:keyWidth="15%p" + latin:visualInsetsRight="1%p" /> <include latin:keyboardLayout="@xml/rowkeys_symbols3" /> + <Key + latin:keyStyle="deleteKeyStyle" + latin:keyWidth="fillRight" + latin:visualInsetsLeft="1%p" /> </Row> <include latin:keyboardLayout="@xml/row_symbols4" /> diff --git a/java/res/xml/rows_symbols_shift.xml b/java/res/xml/rows_symbols_shift.xml index 0ab9715c1..9c03d90b5 100644 --- a/java/res/xml/rows_symbols_shift.xml +++ b/java/res/xml/rows_symbols_shift.xml @@ -40,8 +40,16 @@ <Row latin:keyWidth="10%p" > + <Key + latin:keyStyle="backFromMoreSymbolKeyStyle" + latin:keyWidth="15%p" + latin:visualInsetsRight="1%p" /> <include latin:keyboardLayout="@xml/rowkeys_symbols_shift3" /> + <Key + latin:keyStyle="deleteKeyStyle" + latin:keyWidth="fillRight" + latin:visualInsetsLeft="1%p" /> </Row> <include latin:keyboardLayout="@xml/row_symbols_shift4" /> diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardView.java b/java/src/com/android/inputmethod/keyboard/KeyboardView.java index d5bd7fda3..4a9135310 100644 --- a/java/src/com/android/inputmethod/keyboard/KeyboardView.java +++ b/java/src/com/android/inputmethod/keyboard/KeyboardView.java @@ -662,10 +662,10 @@ public class KeyboardView extends View implements PointerTracker.DrawingProxy { hintY = -mFontMetrics.top + params.mKeyShiftedLetterHintPadding; paint.setTextAlign(Align.CENTER); } else { // key.hasHintLetter() - // The hint label is placed at top-right corner of the key. Used mainly on phone. + // The hint letter is placed at top-right corner of the key. Used mainly on phone. hintX = keyWidth - params.mKeyHintLetterPadding - getCharWidth(KEY_NUMERIC_HINT_LABEL_REFERENCE_CHAR, paint) / 2; - hintY = -paint.ascent() + params.mKeyHintLetterPadding; + hintY = -paint.ascent(); paint.setTextAlign(Align.CENTER); } canvas.drawText(hint, 0, hint.length(), hintX, hintY, paint); diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java index d0f27a9a8..9dc1786c7 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java @@ -25,7 +25,23 @@ import java.util.HashMap; /** * !!!!! DO NOT EDIT THIS FILE !!!!! - * This file is generated by tools/maketext. + * + * This file is generated by tools/maketext. The base template file is + * tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl + * + * This file must be updated when any text resources in keyboard layout files have been changed. + * These text resources are referred as "!text/<resource_name>" in keyboard XML definitions, + * and should be defined in + * tools/maketext/res/values-<locale>/donottranslate-more-keys.xml + * + * To update this file, please run the following commands. + * $ cd $ANDROID_BUILD_TOP + * $ mmm packages/inputmethods/LatinIME/tools/maketext + * $ maketext -java packages/inputmethods/LatinIME/java/src + * + * The updated source file will be generated to the following path (this file). + * packages/inputmethods/LatinIME/java/src/com/android/inputmethod/keyboard/internal/ + * KeyboardTextsSet.java */ public final class KeyboardTextsSet { // Language to texts map. diff --git a/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java b/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java index 613c20304..994b917a7 100644 --- a/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java +++ b/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java @@ -366,6 +366,7 @@ public class AdditionalSubtypeSettings extends PreferenceFragment { final Preference pref = mSubtypePrefGroup.getPreference(i); if (pref instanceof SubtypePreference) { final InputMethodSubtype subtype = ((SubtypePreference)pref).getSubtype(); + if (subtype == null) continue; if (sb.length() > 0) { sb.append(AdditionalSubtype.PREF_SUBTYPE_SEPARATOR); } diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 9429ef411..a644ec0d9 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -83,11 +83,11 @@ public class BinaryDictionary extends Dictionary { private native long openNative(String sourceDir, long dictOffset, long dictSize, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); private native void closeNative(long dict); - private native boolean isValidWordNative(long dict, char[] word, int wordLength); + private native boolean isValidWordNative(long dict, int[] word, int wordLength); private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, boolean useFullEditDistance, char[] outputChars, int[] scores); - private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength, + private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength, int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, int maxWordLength, int maxBigrams); private static native double calcNormalizedScoreNative( @@ -105,7 +105,7 @@ public class BinaryDictionary extends Dictionary { final WordCallback callback) { if (mNativeDict == 0) return; - char[] chars = previousWord.toString().toCharArray(); + int[] codePoints = StringUtils.toCodePointArray(previousWord.toString()); Arrays.fill(mOutputChars_bigrams, (char) 0); Arrays.fill(mBigramScores, 0); @@ -115,8 +115,8 @@ public class BinaryDictionary extends Dictionary { mInputCodes[0] = codes.getCodeAt(0); } - int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize, - mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); + int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes, + codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); if (count > MAX_BIGRAMS) { count = MAX_BIGRAMS; } @@ -200,7 +200,7 @@ public class BinaryDictionary extends Dictionary { @Override public boolean isValidWord(CharSequence word) { if (word == null) return false; - char[] chars = word.toString().toCharArray(); + int[] chars = StringUtils.toCodePointArray(word.toString()); return isValidWordNative(mNativeDict, chars, chars.length); } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index e1978fca1..c6381180c 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1793,6 +1793,24 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen public void pickSuggestionManually(final int index, final CharSequence suggestion, int x, int y) { final SuggestedWords suggestedWords = mSuggestionsView.getSuggestions(); + final InputConnection ic = getCurrentInputConnection(); + if (ic != null) ic.beginBatchEdit(); + + // If this is a punctuation picked from the suggestion strip, pass it to onCodeInput + if (suggestion.length() == 1 && isShowingPunctuationList()) { + // Word separators are suggested before the user inputs something. + // So, LatinImeLogger logs "" as a user's input. + LatinImeLogger.logOnManualSuggestion("", suggestion.toString(), index, suggestedWords); + // Rely on onCodeInput to do the complicated swapping/stripping logic consistently. + if (ProductionFlag.IS_EXPERIMENTAL) { + ResearchLogger.latinIME_punctuationSuggestion(index, suggestion, x, y); + } + final int primaryCode = suggestion.charAt(0); + onCodeInput(primaryCode, + KeyboardActionListener.SUGGESTION_STRIP_COORDINATE, + KeyboardActionListener.SUGGESTION_STRIP_COORDINATE); + return; + } if (SPACE_STATE_PHANTOM == mSpaceState && suggestion.length() > 0) { int firstChar = Character.codePointAt(suggestion, 0); @@ -1810,7 +1828,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } mKeyboardSwitcher.updateShiftState(); resetComposingState(true /* alsoResetLastComposedWord */); - final InputConnection ic = getCurrentInputConnection(); if (ic != null) { final CompletionInfo completionInfo = mApplicationSpecifiedCompletions[index]; ic.commitCompletion(completionInfo); @@ -1822,21 +1839,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen return; } - // If this is a punctuation picked from the suggestion strip, pass it to onCodeInput - if (suggestion.length() == 1 && isShowingPunctuationList()) { - // Word separators are suggested before the user inputs something. - // So, LatinImeLogger logs "" as a user's input. - LatinImeLogger.logOnManualSuggestion("", suggestion.toString(), index, suggestedWords); - // Rely on onCodeInput to do the complicated swapping/stripping logic consistently. - if (ProductionFlag.IS_EXPERIMENTAL) { - ResearchLogger.latinIME_punctuationSuggestion(index, suggestion, x, y); - } - final int primaryCode = suggestion.charAt(0); - onCodeInput(primaryCode, - KeyboardActionListener.SUGGESTION_STRIP_COORDINATE, - KeyboardActionListener.SUGGESTION_STRIP_COORDINATE); - return; - } // We need to log before we commit, because the word composer will store away the user // typed word. final String replacedWord = mWordComposer.getTypedWord().toString(); @@ -1889,6 +1891,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mHandler.postUpdateSuggestions(); } } + if (null != ic) ic.endBatchEdit(); } /** diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 97df98e34..cc98010fb 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -174,6 +174,13 @@ public class BinaryDictInputOutput { private static final int MAX_TERMINAL_FREQUENCY = 255; + // Arbitrary limit to how much passes we consider address size compression should + // terminate in. At the time of this writing, our largest dictionary completes + // compression in five passes. + // If the number of passes exceeds this number, makedict bails with an exception on + // suspicion that a bug might be causing an infinite loop. + private static final int MAX_PASSES = 24; + /** * A class grouping utility function for our specific character encoding. */ @@ -510,14 +517,22 @@ public class BinaryDictInputOutput { * Each node stores its tentative address. During dictionary address computing, these * are not final, but they can be used to compute the node size (the node size depends * on the address of the children because the number of bytes necessary to store an - * address depends on its numeric value. + * address depends on its numeric value. The return value indicates whether the node + * contents (as in, any of the addresses stored in the cache fields) have changed with + * respect to their previous value. * * @param node the node to compute the size of. * @param dict the dictionary in which the word/attributes are to be found. + * @return false if none of the cached addresses inside the node changed, true otherwise. */ - private static void computeActualNodeSize(Node node, FusionDictionary dict) { + private static boolean computeActualNodeSize(Node node, FusionDictionary dict) { + boolean changed = false; int size = getGroupCountSize(node); for (CharGroup group : node.mData) { + if (group.mCachedAddress != node.mCachedAddress + size) { + changed = true; + group.mCachedAddress = node.mCachedAddress + size; + } int groupSize = GROUP_FLAGS_SIZE + getGroupCharactersSize(group); if (group.isTerminal()) groupSize += GROUP_FREQUENCY_SIZE; if (null != group.mChildren) { @@ -538,7 +553,11 @@ public class BinaryDictInputOutput { group.mCachedSize = groupSize; size += groupSize; } - node.mCachedSize = size; + if (node.mCachedSize != size) { + node.mCachedSize = size; + changed = true; + } + return changed; } /** @@ -594,13 +613,14 @@ public class BinaryDictInputOutput { changesDone = false; for (Node n : flatNodes) { final int oldNodeSize = n.mCachedSize; - computeActualNodeSize(n, dict); + final boolean changed = computeActualNodeSize(n, dict); final int newNodeSize = n.mCachedSize; if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!"); - if (oldNodeSize != newNodeSize) changesDone = true; + changesDone |= changed; } stackNodes(flatNodes); ++passes; + if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); } while (changesDone); final Node lastNode = flatNodes.get(flatNodes.size() - 1); @@ -1122,6 +1142,12 @@ public class BinaryDictInputOutput { } } + // The word cache here is a stopgap bandaid to help the catastrophic performance + // of this method. Since it performs direct, unbuffered random access to the file and + // may be called hundreds of thousands of times, the resulting performance is not + // reasonable without some kind of cache. Thus: + // TODO: perform buffered I/O here and in other places in the code. + private static TreeMap<Integer, String> wordCache = new TreeMap<Integer, String>(); /** * Finds, as a string, the word at the address passed as an argument. * @@ -1131,8 +1157,10 @@ public class BinaryDictInputOutput { * @return the word, as a string. * @throws IOException if the file can't be read. */ - private static String getWordAtAddress(RandomAccessFile source, long headerSize, + private static String getWordAtAddress(final RandomAccessFile source, final long headerSize, int address) throws IOException { + final String cachedString = wordCache.get(address); + if (null != cachedString) return cachedString; final long originalPointer = source.getFilePointer(); source.seek(headerSize); final int count = readCharGroupCount(source); @@ -1171,6 +1199,7 @@ public class BinaryDictInputOutput { } } source.seek(originalPointer); + wordCache.put(address, result); return result; } diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java index a17371396..26a9415c4 100644 --- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java +++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionsView.java @@ -171,7 +171,7 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, public boolean mMoreSuggestionsAvailable; - public final TextView mWordToSaveView; + private final TextView mWordToSaveView; private final TextView mLeftwardsArrowView; private final TextView mHintToSaveView; @@ -477,7 +477,7 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, } public void layoutAddToDictionaryHint(CharSequence word, ViewGroup stripView, - int stripWidth, CharSequence hintText) { + int stripWidth, CharSequence hintText, OnClickListener listener) { final int width = stripWidth - mDividerWidth - mPadding * 2; final TextView wordView = mWordToSaveView; @@ -508,6 +508,18 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, stripView.addView(hintView); setLayoutWeight( hintView, 1.0f - mCenterSuggestionWeight, ViewGroup.LayoutParams.MATCH_PARENT); + + wordView.setOnClickListener(listener); + leftArrowView.setOnClickListener(listener); + hintView.setOnClickListener(listener); + } + + public CharSequence getAddToDictionaryWord() { + return (CharSequence)mWordToSaveView.getTag(); + } + + public boolean isAddToDictionaryShowing(View v) { + return v == mWordToSaveView || v == mHintToSaveView || v == mLeftwardsArrowView; } private static void setLayoutWeight(View v, float weight, int height) { @@ -620,7 +632,6 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, } mParams = new SuggestionsViewParams(context, attrs, defStyle, mWords, mDividers, mInfos); - mParams.mWordToSaveView.setOnClickListener(this); mMoreSuggestionsContainer = inflater.inflate(R.layout.more_suggestions, null); mMoreSuggestionsView = (MoreSuggestionsView)mMoreSuggestionsContainer @@ -676,12 +687,12 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, public boolean isShowingAddToDictionaryHint() { return mSuggestionsStrip.getChildCount() > 0 - && mSuggestionsStrip.getChildAt(0) == mParams.mWordToSaveView; + && mParams.isAddToDictionaryShowing(mSuggestionsStrip.getChildAt(0)); } public void showAddToDictionaryHint(CharSequence word, CharSequence hintText) { clear(); - mParams.layoutAddToDictionaryHint(word, mSuggestionsStrip, getWidth(), hintText); + mParams.layoutAddToDictionaryHint(word, mSuggestionsStrip, getWidth(), hintText, this); } public boolean dismissAddToDictionaryHint() { @@ -851,8 +862,8 @@ public class SuggestionsView extends RelativeLayout implements OnClickListener, @Override public void onClick(View view) { - if (view == mParams.mWordToSaveView) { - addToDictionary((CharSequence)view.getTag()); + if (mParams.isAddToDictionaryShowing(view)) { + addToDictionary(mParams.getAddToDictionaryWord()); clear(); return; } diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 2ef72e1e8..3e72ce684 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -153,30 +153,30 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, } static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict, - jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, + jintArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return 0; - jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0); + jint *prevWord = env->GetIntArrayElements(prevWordArray, 0); int *inputCodes = env->GetIntArrayElements(inputArray, 0); jchar *outputChars = env->GetCharArrayElements(outputArray, 0); int *frequencies = env->GetIntArrayElements(frequencyArray, 0); - int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes, + int count = dictionary->getBigrams(prevWord, prevWordLength, inputCodes, inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams); env->ReleaseIntArrayElements(frequencyArray, frequencies, 0); env->ReleaseCharArrayElements(outputArray, outputChars, 0); env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT); - env->ReleaseCharArrayElements(prevWordArray, prevWord, JNI_ABORT); + env->ReleaseIntArrayElements(prevWordArray, prevWord, JNI_ABORT); return count; } static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict, - jcharArray wordArray, jint wordLength) { + jintArray wordArray, jint wordLength) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return (jboolean) false; - jchar *word = env->GetCharArrayElements(wordArray, 0); - jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength); - env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT); + jint *word = env->GetIntArrayElements(wordArray, 0); + jboolean result = dictionary->isValidWord(word, wordLength); + env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT); return result; } @@ -236,8 +236,8 @@ static JNINativeMethod sMethods[] = { {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, - {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, - {"getBigramsNative", "(J[CI[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, + {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord}, + {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)D", (void*)latinime_BinaryDictionary_calcNormalizedScore}, {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance} diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 320b0af68..927381fdb 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -96,7 +96,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes, +int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes, int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength, int maxBigrams) { // TODO: remove unused arguments, and refrain from storing stuff in members of this class @@ -134,7 +134,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i // Returns a pointer to the start of the bigram list. // If the word is not found or has no bigrams, this function returns 0. int BigramDictionary::getBigramListForWord(const uint8_t* const root, - const unsigned short *prevWord, const int prevWordLength) { + const int32_t *prevWord, const int prevWordLength) { int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength); if (NOT_VALID_WORD == pos) return 0; diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 1612131c4..07e47f059 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -25,10 +25,10 @@ class Dictionary; class BigramDictionary { public: BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary); - int getBigrams(unsigned short *word, int length, int *codes, int codesSize, + int getBigrams(const int32_t *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams); int getBigramListForWord(const uint8_t* const root, - const unsigned short *prevWord, const int prevWordLength); + const int32_t *prevWord, const int prevWordLength); ~BigramDictionary(); private: bool addWordBigram(unsigned short *word, int length, int frequency); diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index f59302460..d5d67c108 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -62,10 +62,11 @@ class BinaryFormat { static bool hasChildrenInFlags(const uint8_t flags); static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags, int *pos); - static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord, + static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord, const int length); static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, uint16_t* outWord); + static int getProbability(const int bigramListPosition, const int unigramFreq); // Flags for special processing // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or @@ -304,7 +305,7 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, - const uint16_t* const inWord, const int length) { + const int32_t* const inWord, const int length) { int pos = 0; int wordPos = 0; @@ -313,7 +314,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, // there was no match (or we would have found it). if (wordPos > length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); - const uint16_t wChar = inWord[wordPos]; + const int32_t wChar = inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // find a matching character for this depth, therefore there is no match. @@ -517,6 +518,14 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a return 0; } +// This should probably return a probability in log space. +inline int BinaryFormat::getProbability(const int bigramListPosition, const int unigramFreq) { + // TODO: use the bigram list position to get the bigram probability. If the bigram + // is not found, use the unigram frequency. + // TODO: if the unigram frequency is used, compute the actual probability + return unigramFreq; +} + } // namespace latinime #endif // LATINIME_BINARY_FORMAT_H diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 087219ed4..376e9a10e 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -165,28 +165,28 @@ int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wo wordCount, this, isSpaceProximity, word); } -int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { - return getFinalFreqInternal(freq, word, wordLength, mInputLength); +int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { + return getFinalProbabilityInternal(probability, word, wordLength, mInputLength); } -int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength, - const int inputLength) { - return getFinalFreqInternal(freq, word, wordLength, inputLength); +int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, + int *wordLength, const int inputLength) { + return getFinalProbabilityInternal(probability, word, wordLength, inputLength); } -int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength, - const int inputLength) { +int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, + int *wordLength, const int inputLength) { const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; if (outputIndex < MIN_SUGGEST_DEPTH) { - return NOT_A_FREQUENCY; + return NOT_A_PROBABILITY; } *word = mWord; - int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq( - inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength); - return finalFreq; + int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( + inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength); + return finalProbability; } bool Correction::initProcessState(const int outputIndex) { @@ -649,8 +649,8 @@ inline static bool isUpperCase(unsigned short c) { ////////////////////// /* static */ -int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex, - const int freq, int* editDistanceTable, const Correction* correction, +int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, + const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction, const int inputLength) { const int excessivePos = correction->getExcessivePos(); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index ee55c9604..1b4e4bf4e 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -132,9 +132,9 @@ class Correction { int getFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word); - int getFinalFreq(const int freq, unsigned short **word, int* wordLength); - int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength, - const int inputLength); + int getFinalProbability(const int probability, unsigned short **word, int* wordLength); + int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, + int* wordLength, const int inputLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); @@ -156,8 +156,8 @@ class Correction { class RankingAlgorithm { public: - static int calculateFinalFreq(const int inputIndex, const int depth, - const int freq, int *editDistanceTable, const Correction* correction, + static int calculateFinalProbability(const int inputIndex, const int depth, + const int probability, int *editDistanceTable, const Correction* correction, const int inputLength); static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction* correction, const bool isSpaceProximity, @@ -182,8 +182,8 @@ class Correction { const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); - inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength, - const int inputLength); + inline int getFinalProbabilityInternal(const int probability, unsigned short **word, + int* wordLength, const int inputLength); const int TYPED_LETTER_MULTIPLIER; const int FULL_WORD_MULTIPLIER; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index e882c3714..c99f8a8b2 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -172,7 +172,7 @@ static inline void prof_out(void) { #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3 #define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4 #define NOT_AN_INDEX -1 -#define NOT_A_FREQUENCY -1 +#define NOT_A_PROBABILITY -1 #define KEYCODE_SPACE ' ' diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 90ec207f0..9dc207223 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -54,7 +54,7 @@ Dictionary::~Dictionary() { delete mBigramDictionary; } -bool Dictionary::isValidWord(unsigned short *word, int length) { +bool Dictionary::isValidWord(const int32_t *word, int length) { return mUnigramDictionary->isValidWord(word, length); } diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 66a5c2150..5b9ddb3e9 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -35,18 +35,22 @@ class Dictionary { int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, int *codes, int codesSize, bool useFullEditDistance, unsigned short *outWords, int *frequencies) { + // bigramListPosition is, as an int, the offset of the bigram list in the file. + // If none, it's zero. + // TODO: get this from the bigram dictionary instance + const int bigramListPosition = 0; return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, - mCorrection, xcoordinates, ycoordinates, codes, - codesSize, useFullEditDistance, outWords, frequencies); + mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition, + useFullEditDistance, outWords, frequencies); } - int getBigrams(unsigned short *word, int length, int *codes, int codesSize, + int getBigrams(const int32_t *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) { return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, maxWordLength, maxBigrams); } - bool isValidWord(unsigned short *word, int length); + bool isValidWord(const int32_t *word, int length); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } int getMmapFd() { return mMmapFd; } diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index ab8570e6f..0c759d438 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -98,7 +98,8 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, int *yCoordinatesBuffer, - const int codesBufferSize, const bool useFullEditDistance, const int *codesSrc, + const int codesBufferSize, const int bigramListPosition, + const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, const digraph_t* const digraphs, const unsigned int digraphsSize) { @@ -127,8 +128,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit replacementCodePoint; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, - useFullEditDistance, codesSrc + i + 1, codesRemain - i - 1, - currentDepth + 1, codesDest + i, correction, + bigramListPosition, useFullEditDistance, codesSrc + i + 1, + codesRemain - i - 1, currentDepth + 1, codesDest + i, correction, queuePool, digraphs, digraphsSize); // Copy the second char of the digraph in place, then continue processing on @@ -137,9 +138,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, - useFullEditDistance, codesSrc + i, codesRemain - i, currentDepth + 1, - codesDest + i, correction, queuePool, - digraphs, digraphsSize); + bigramListPosition, useFullEditDistance, codesSrc + i, codesRemain - i, + currentDepth + 1, codesDest + i, correction, queuePool, digraphs, + digraphsSize); return; } } @@ -160,14 +161,16 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit } getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer, - startIndex + codesRemain, useFullEditDistance, correction, + startIndex + codesRemain, bigramListPosition, useFullEditDistance, correction, queuePool); } +// bigramListPosition is the offset in the file to the list of bigrams for the previous word. int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies) { + const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords, + int *frequencies) { queuePool->clearAll(); Correction* masterCorrection = correction; @@ -177,8 +180,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int xCoordinatesBuffer[codesSize]; int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, - codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, + xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition, + useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool, GERMAN_UMLAUT_DIGRAPHS, sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0])); } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { @@ -186,13 +189,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int xCoordinatesBuffer[codesSize]; int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, - codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, + xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition, + useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool, FRENCH_LIGATURES_DIGRAPHS, sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0])); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, - useFullEditDistance, masterCorrection, queuePool); + bigramListPosition, useFullEditDistance, masterCorrection, queuePool); } PROF_START(20); @@ -225,16 +228,16 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputLength, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool) { + const int inputLength, const int bigramListPosition, const bool useFullEditDistance, + Correction *correction, WordsPriorityQueuePool *queuePool) { PROF_OPEN; PROF_START(0); PROF_END(0); PROF_START(1); - getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, - inputLength, correction, queuePool); + getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramListPosition, + useFullEditDistance, inputLength, correction, queuePool); PROF_END(1); PROF_START(2); @@ -305,15 +308,16 @@ static const char SPACE = ' '; void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, Correction *correction, - WordsPriorityQueuePool *queuePool) { + const int bigramListPosition, const bool useFullEditDistance, const int inputLength, + Correction *correction, WordsPriorityQueuePool *queuePool) { initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool, - true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); + getSuggestionCandidates(useFullEditDistance, inputLength, bigramListPosition, correction, + queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, - const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool, + const int inputLength, const int bigramListPosition, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, @@ -333,8 +337,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, int firstChildPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - correction, &childCount, &firstChildPos, &siblingPos, queuePool, - currentWordIndex); + bigramListPosition, correction, &childCount, &firstChildPos, &siblingPos, + queuePool, currentWordIndex); // Update next sibling pos correction->setTreeSiblingPos(outputIndex, siblingPos); @@ -349,7 +353,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, } } -inline void UnigramDictionary::onTerminal(const int freq, +inline void UnigramDictionary::onTerminal(const int probability, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, const int currentWordIndex) { @@ -361,26 +365,28 @@ inline void UnigramDictionary::onTerminal(const int freq, if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); - if (finalFreq != NOT_A_FREQUENCY) { - addWord(wordPointer, wordLength, finalFreq, masterQueue); + const int finalProbability = + correction->getFinalProbability(probability, &wordPointer, &wordLength); + if (finalProbability != NOT_A_PROBABILITY) { + addWord(wordPointer, wordLength, finalProbability, masterQueue); - const int shortcutFreq = finalFreq > 0 ? finalFreq - 1 : 0; + const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0; // Please note that the shortcut candidates will be added to the master queue only. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); while (iterator.hasNextShortcutTarget()) { // TODO: addWord only supports weak ordering, meaning we have no means // to control the order of the shortcuts relative to one another or to the word. - // We need to either modulate the frequency of each shortcut according - // to its own shortcut frequency or to make the queue + // We need to either modulate the probability of each shortcut according + // to its own shortcut probability or to make the queue // so that the insert order is protected inside the queue for words // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget); - addWord(shortcutTarget, shortcutTargetStringLength, shortcutFreq, masterQueue); + addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability, + masterQueue); } } } @@ -393,9 +399,9 @@ inline void UnigramDictionary::onTerminal(const int freq, if (!subQueue) { return; } - const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, - inputIndex); - addWord(wordPointer, wordLength, finalFreq, subQueue); + const int finalProbability = correction->getFinalProbabilityForSubQueue( + probability, &wordPointer, &wordLength, inputIndex); + addWord(wordPointer, wordLength, finalProbability, subQueue); } } @@ -424,8 +430,10 @@ bool UnigramDictionary::getSubStringSuggestion( initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], codes + offset, inputWordLength, correction); queuePool->clearSubQueue(currentWordIndex); - getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, - queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); + // TODO: pass the bigram list for substring suggestion + getSuggestionCandidates(useFullEditDistance, inputWordLength, + 0 /* bigramListPosition */, correction, queuePool, false /* doAutoCompletion */, + MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); if (DEBUG_DICT) { if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); @@ -730,7 +738,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor return maxFreq; } -bool UnigramDictionary::isValidWord(const uint16_t* const inWord, const int length) const { +bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const { return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length); } @@ -755,7 +763,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, - Correction *correction, int *newCount, + const int bigramListPosition, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) { if (DEBUG_DICT) { @@ -834,11 +842,14 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, if (isTerminalNode) { // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. - const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); + const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); - onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal, + // The bigramListPosition is the offset in the file of the bigrams for the previous word, + // or zero if we don't know of any bigrams for it. + const int probability = BinaryFormat::getProbability(bigramListPosition, unigramFreq); + onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal, currentWordIndex); // If there are more chars in this node, then this virtual node has children. diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index 4479cd94e..0cc59bac8 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -71,37 +71,38 @@ class UnigramDictionary { UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - bool isValidWord(const uint16_t* const inWord, const int length) const; + bool isValidWord(const int32_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, - Correction *correction, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, + Correction *correction, const int *xcoordinates, const int *ycoordinates, + const int *codes, const int codesSize, const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords, int *frequencies); virtual ~UnigramDictionary(); private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, - const bool useFullEditDistance, Correction *correction, + const int bigramListPosition, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool); int getDigraphReplacement(const int *codes, const int i, const int codesSize, const digraph_t* const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, - int *xCoordinatesBuffer, int *yCoordinatesBuffer, - const int codesBufferSize, const bool useFullEditDistance, const int* codesSrc, + int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize, + const int bigramListPosition, const bool useFullEditDistance, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs, const unsigned int digraphsSize); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const bool useFullEditDistance, - const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); - void getSuggestionCandidates( + const int *ycoordinates, const int *codes, const int bigramListPosition, const bool useFullEditDistance, const int inputLength, Correction *correction, - WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors, - const int currentWordIndex); + WordsPriorityQueuePool* queuePool); + void getSuggestionCandidates( + const bool useFullEditDistance, const int inputLength, const int bigramListPosition, + Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, + const int maxErrors, const int currentWordIndex); void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, @@ -113,9 +114,9 @@ class UnigramDictionary { bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character - bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, - int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, - const int currentWordIndex); + bool processCurrentNode(const int initialPos, const int bigramListPosition, + Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition, + WordsPriorityQueuePool *queuePool, const int currentWordIndex); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, diff --git a/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl b/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl index 5a193388a..8df94c1aa 100644 --- a/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl +++ b/tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl @@ -25,7 +25,23 @@ import java.util.HashMap; /** * !!!!! DO NOT EDIT THIS FILE !!!!! - * This file is generated by tools/maketext. + * + * This file is generated by tools/maketext. The base template file is + * tools/maketext/res/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.tmpl + * + * This file must be updated when any text resources in keyboard layout files have been changed. + * These text resources are referred as "!text/<resource_name>" in keyboard XML definitions, + * and should be defined in + * tools/maketext/res/values-<locale>/donottranslate-more-keys.xml + * + * To update this file, please run the following commands. + * $ cd $ANDROID_BUILD_TOP + * $ mmm packages/inputmethods/LatinIME/tools/maketext + * $ maketext -java packages/inputmethods/LatinIME/java/src + * + * The updated source file will be generated to the following path (this file). + * packages/inputmethods/LatinIME/java/src/com/android/inputmethod/keyboard/internal/ + * KeyboardTextsSet.java */ public final class KeyboardTextsSet { // Language to texts map. |