11 files changed, 232 insertions, 130 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index e18aee6ff..cb1069cfb 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -84,7 +84,7 @@ public class BinaryDictionary extends Dictionary {
     private native long openNative(String sourceDir, long dictOffset, long dictSize,
             int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
     private native void closeNative(long dict);
-    private native boolean isValidWordNative(long dict, int[] word, int wordLength);
+    private native int getFrequencyNative(long dict, int[] word, int wordLength);
     private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
     private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
             int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
@@ -203,7 +203,8 @@ public class BinaryDictionary extends Dictionary {
     public boolean isValidWord(CharSequence word) {
         if (word == null) return false;
         int[] chars = StringUtils.toCodePointArray(word.toString());
-        return isValidWordNative(mNativeDict, chars, chars.length);
+        final int freq = getFrequencyNative(mNativeDict, chars, chars.length);
+        return freq >= 0;
     }
 
     // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
index fa3d1be11..d5163f2a1 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
@@ -30,8 +30,6 @@ import android.util.Log;
 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
 
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
 
 /**
  * Locally gathers stats about the words user types and various other signals like auto-correction
@@ -39,6 +37,7 @@ import java.util.Iterator;
  */
 public class UserHistoryDictionary extends ExpandableDictionary {
     private static final String TAG = "UserHistoryDictionary";
+    public static final boolean DBG_SAVE_RESTORE = false;
 
     /** Any pair being typed or picked */
     private static final int FREQUENCY_FOR_TYPED = 2;
@@ -78,7 +77,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
     /** Locale for which this auto dictionary is storing words */
     private String mLocale;
 
-    private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
+    private UserHistoryDictionaryBigramList mBigramList =
+            new UserHistoryDictionaryBigramList();
     private final Object mPendingWritesLock = new Object();
     private static volatile boolean sUpdatingDB = false;
     private final SharedPreferences mPrefs;
@@ -99,35 +99,6 @@ public class UserHistoryDictionary extends ExpandableDictionary {
 
     private static DatabaseHelper sOpenHelper = null;
 
-    private static class Bigram {
-        public final String mWord1;
-        public final String mWord2;
-
-        Bigram(String word1, String word2) {
-            this.mWord1 = word1;
-            this.mWord2 = word2;
-        }
-
-        @Override
-        public boolean equals(Object bigram) {
-            if (!(bigram instanceof Bigram)) {
-                return false;
-            }
-            final Bigram bigram2 = (Bigram) bigram;
-            final boolean eq1 =
-                    mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1);
-            if (!eq1) {
-                return false;
-            }
-            return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2);
-        }
-
-        @Override
-        public int hashCode() {
-            return (mWord1 + " " + mWord2).hashCode();
-        }
-    }
-
     public void setDatabaseMax(int maxHistoryBigram) {
         sMaxHistoryBigrams = maxHistoryBigram;
     }
@@ -190,20 +161,17 @@ public class UserHistoryDictionary extends ExpandableDictionary {
             freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
         }
         synchronized (mPendingWritesLock) {
-            final Bigram bi = new Bigram(word1, word2);
-            if (!mPendingWrites.contains(bi)) {
-                mPendingWrites.add(bi);
-            }
+            mBigramList.addBigram(word1, word2);
         }
 
         return freq;
     }
 
     public boolean cancelAddingUserHistory(String word1, String word2) {
-        final Bigram bi = new Bigram(word1, word2);
-        if (mPendingWrites.contains(bi)) {
-            mPendingWrites.remove(bi);
-            return super.removeBigram(word1, word2);
+        synchronized (mPendingWritesLock) {
+            if (mBigramList.removeBigram(word1, word2)) {
+                return super.removeBigram(word1, word2);
+            }
         }
         return false;
     }
@@ -214,11 +182,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
     private void flushPendingWrites() {
         synchronized (mPendingWritesLock) {
             // Nothing pending? Return
-            if (mPendingWrites.isEmpty()) return;
+            if (mBigramList.isEmpty()) return;
             // Create a background thread to write the pending entries
-            new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale, this).execute();
+            new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this).execute();
             // Create a new map for writing new entries into while the old one is written to db
-            mPendingWrites = new HashSet<Bigram>();
+            mBigramList = new UserHistoryDictionaryBigramList();
         }
     }
 
@@ -251,6 +219,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                     final String word1 = cursor.getString(word1Index);
                     final String word2 = cursor.getString(word2Index);
                     final int frequency = cursor.getInt(frequencyIndex);
+                    if (DBG_SAVE_RESTORE) {
+                        Log.d(TAG, "--- Load user history: " + word1 + ", " + word2);
+                    }
                     // Safeguard against adding really long words. Stack may overflow due
                     // to recursive lookup
                     if (null == word1) {
@@ -259,8 +230,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                             && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) {
                         super.setBigramAndGetFrequency(
                                 word1, word2, new ForgettingCurveParams(frequency, now, last));
-                        // TODO: optimize
-                        mPendingWrites.add(new Bigram(word1, word2));
+                    }
+                    synchronized(mPendingWritesLock) {
+                        mBigramList.addBigram(word1, word2);
                     }
                     cursor.moveToNext();
                 }
@@ -339,14 +311,15 @@ public class UserHistoryDictionary extends ExpandableDictionary {
      * the in-memory trie.
      */
     private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
-        private final HashSet<Bigram> mMap;
+        private final UserHistoryDictionaryBigramList mBigramList;
         private final DatabaseHelper mDbHelper;
         private final String mLocale;
         private final UserHistoryDictionary mUserHistoryDictionary;
 
-        public UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites,
+        public UpdateDbTask(
+                DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites,
                 String locale, UserHistoryDictionary dict) {
-            mMap = pendingWrites;
+            mBigramList = pendingWrites;
             mLocale = locale;
             mDbHelper = openHelper;
             mUserHistoryDictionary = dict;
@@ -401,67 +374,71 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                 return null;
             }
             db.execSQL("PRAGMA foreign_keys = ON;");
+            final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams;
+
             // Write all the entries to the db
-            final Iterator<Bigram> iterator = mMap.iterator();
-            while (iterator.hasNext()) {
-                // TODO: this process of making a text search for each pair each time
-                // is terribly inefficient. Optimize this.
-                final Bigram bi = iterator.next();
-
-                // find pair id
-                Cursor c = null;
-                try {
-                    if (null != bi.mWord1) {
-                        c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
-                                MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
-                                        + MAIN_COLUMN_LOCALE + "=?",
-                                        new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null,
-                                        null);
-                    } else {
-                        c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
-                                MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND "
-                                        + MAIN_COLUMN_LOCALE + "=?",
-                                        new String[] { bi.mWord2, mLocale }, null, null, null);
-                    }
+            for (String word1 : mBigramList.keySet()) {
+                for (String word2 : mBigramList.getBigrams(word1)) {
+                    // TODO: this process of making a text search for each pair each time
+                    // is terribly inefficient. Optimize this.
+                    // find pair id
+                    Cursor c = null;
+                    try {
+                        if (null != word1) {
+                            c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
+                                    MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
+                                            + MAIN_COLUMN_LOCALE + "=?",
+                                            new String[] { word1, word2, mLocale }, null, null,
+                                            null);
+                        } else {
+                            c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
+                                    MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2
+                                            + "=? AND " + MAIN_COLUMN_LOCALE + "=?",
+                                            new String[] { word2, mLocale }, null, null, null);
+                        }
 
-                    final int pairId;
-                    if (c.moveToFirst()) {
-                        // existing pair
-                        pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
-                        db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
-                                new String[] { Integer.toString(pairId) });
-                    } else {
-                        // new pair
-                        Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
-                                getContentValues(bi.mWord1, bi.mWord2, mLocale));
-                        pairId = pairIdLong.intValue();
-                    }
-                    // insert new frequency
-                    final int freq;
-                    if (bi.mWord1 == null) {
-                        freq = FREQUENCY_FOR_TYPED;
-                    } else {
-                        final NextWord nw = mUserHistoryDictionary.getBigramWord(
-                                bi.mWord1, bi.mWord2);
-                        if (nw != null) {
-                            final int tempFreq = nw.getFcValue();
-                            // TODO: Check whether the word is valid or not
-                            if (UserHistoryForgettingCurveUtils.needsToSave(
-                                    (byte)tempFreq, false)) {
-                                freq = tempFreq;
+                        final int pairId;
+                        if (c.moveToFirst()) {
+                            // existing pair
+                            pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
+                            db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
+                                    new String[] { Integer.toString(pairId) });
+                        } else {
+                            // new pair
+                            Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
+                                    getContentValues(word1, word2, mLocale));
+                            pairId = pairIdLong.intValue();
+                        }
+                        // insert new frequency
+                        final int freq;
+                        if (word1 == null) {
+                            freq = FREQUENCY_FOR_TYPED;
+                        } else {
+                            final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
+                            if (nw != null) {
+                                final int tempFreq = nw.getFcValue();
+                                // TODO: Check whether the word is valid or not
+                                if (UserHistoryForgettingCurveUtils.needsToSave(
+                                        (byte)tempFreq, false, addLevel0Bigram)) {
+                                    freq = tempFreq;
+                                } else {
+                                    freq = -1;
+                                }
                             } else {
                                 freq = -1;
                             }
-                        } else {
-                            freq = -1;
                         }
-                    }
-                    if (freq > 0) {
-                        db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, freq));
-                    }
-                } finally {
-                    if (c != null) {
-                        c.close();
+                        if (freq > 0) {
+                            if (DBG_SAVE_RESTORE) {
+                                Log.d(TAG, "--- Save user history: " + word1 + ", " + word2);
+                            }
+                            db.insert(FREQ_TABLE_NAME, null,
+                                    getFrequencyContentValues(pairId, freq));
+                        }
+                    } finally {
+                        if (c != null) {
+                            c.close();
+                        }
                     }
                 }
             }
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java
new file mode 100644
index 000000000..409f921ff
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.util.Log;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A store of bigrams which will be updated when the user history dictionary is closed
+ * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
+ * bigrams when we write to the SQL DB.
+ */
+public class UserHistoryDictionaryBigramList {
+    private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
+    private static final HashSet<String> EMPTY_STRING_SET = new HashSet<String>();
+    private final HashMap<String, HashSet<String>> mBigramMap =
+            new HashMap<String, HashSet<String>>();
+    private int mSize = 0;
+
+    public void evictAll() {
+        mSize = 0;
+        mBigramMap.clear();
+    }
+
+    public void addBigram(String word1, String word2) {
+        if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
+            Log.d(TAG, "--- add bigram: " + word1 + ", " + word2);
+        }
+        final HashSet<String> set;
+        if (mBigramMap.containsKey(word1)) {
+            set = mBigramMap.get(word1);
+        } else {
+            set = new HashSet<String>();
+            mBigramMap.put(word1, set);
+        }
+        if (!set.contains(word2)) {
+            ++mSize;
+            set.add(word2);
+        }
+    }
+
+    public int size() {
+        return mSize;
+    }
+
+    public boolean isEmpty() {
+        return mBigramMap.isEmpty();
+    }
+
+    public Set<String> keySet() {
+        return mBigramMap.keySet();
+    }
+
+    public HashSet<String> getBigrams(String word1) {
+        if (!mBigramMap.containsKey(word1)) {
+            return EMPTY_STRING_SET;
+        } else {
+            return mBigramMap.get(word1);
+        }
+    }
+
+    public boolean removeBigram(String word1, String word2) {
+        final HashSet<String> set = getBigrams(word1);
+        if (set.isEmpty()) {
+            return false;
+        }
+        if (set.contains(word2)) {
+            set.remove(word2);
+            --mSize;
+            return true;
+        }
+        return false;
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java
index f30fee23e..feb1d0029 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java
@@ -162,10 +162,15 @@ public class UserHistoryForgettingCurveUtils {
 
     // TODO: isValid should be false for a word whose frequency is 0,
     // or that is not in the dictionary.
-    public static boolean needsToSave(byte fc, boolean isValid) {
+    /**
+     * Check wheather we should save the bigram to the SQL DB or not
+     */
+    public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) {
         int level = fcToLevel(fc);
-        if (isValid && level == 0) {
-            return false;
+        if (level == 0) {
+            if (isValid || !addLevel0Bigram) {
+                return false;
+            }
         }
         final int elapsedTime = fcToElapsedTime(fc);
         return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index f130062a1..d10dc962e 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -173,12 +173,12 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlo
     return count;
 }
 
-static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
+static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict,
         jintArray wordArray, jint wordLength) {
     Dictionary *dictionary = (Dictionary*)dict;
     if (!dictionary) return (jboolean) false;
     jint *word = env->GetIntArrayElements(wordArray, 0);
-    jboolean result = dictionary->isValidWord(word, wordLength);
+    jint result = dictionary->getFrequency(word, wordLength);
     env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
     return result;
 }
@@ -253,7 +253,7 @@ static JNINativeMethod sMethods[] = {
     {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
     {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
             (void*)latinime_BinaryDictionary_getSuggestions},
-    {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
+    {"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency},
     {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
     {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
     {"calcNormalizedScoreNative", "([CI[CII)F",
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index ac2a26172..eb4bf8d1a 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -117,14 +117,22 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
     do {
         bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
         uint16_t bigramBuffer[MAX_WORD_LENGTH];
+        int unigramFreq;
         const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
                 &pos);
         const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
-                bigramBuffer);
+                bigramBuffer, &unigramFreq);
 
         // codesSize == 0 means we are trying to find bigram predictions.
         if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) {
-            const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+            const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+            // Due to space constraints, the frequency for bigrams is approximate - the lower the
+            // unigram frequency, the worse the precision. The theoritical maximum error in
+            // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
+            // in very bad cases. This means that sometimes, we'll see some bigrams interverted
+            // here, but it can't get too bad.
+            const int frequency =
+                    BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq);
             if (addWordBigram(bigramBuffer, length, frequency)) {
                 ++bigramCount;
             }
@@ -149,8 +157,8 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
     } else {
         pos = BinaryFormat::skipOtherCharacters(root, pos);
     }
-    pos = BinaryFormat::skipChildrenPosition(flags, pos);
     pos = BinaryFormat::skipFrequency(flags, pos);
+    pos = BinaryFormat::skipChildrenPosition(flags, pos);
     pos = BinaryFormat::skipShortcuts(root, flags, pos);
     return pos;
 }
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 40f197619..51bf8ebbc 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -66,7 +66,8 @@ class BinaryFormat {
     static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
             const int length);
     static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
-            uint16_t* outWord);
+            uint16_t* outWord, int* outUnigramFrequency);
+    static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
     static int getProbability(const int position, const std::map<int, int> *bigramMap,
             const uint8_t *bigramFilter, const int unigramFreq);
 
@@ -390,10 +391,11 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
  * address: the byte position of the last chargroup of the word we are searching for (this is
  *   what is stored as the "bigram address" in each bigram)
  * outword: an array to write the found word, with MAX_WORD_LENGTH size.
+ * outUnigramFrequency: a pointer to an int to write the frequency into.
  * Return value : the length of the word, of 0 if the word was not found.
  */
 inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address,
-        const int maxDepth, uint16_t* outWord) {
+        const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) {
     int pos = 0;
     int wordPos = 0;
 
@@ -421,11 +423,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
                     // We count chars in order to avoid infinite loops if the file is broken or
                     // if there is some other bug
                     int charCount = maxDepth;
-                    while (-1 != nextChar && --charCount > 0) {
+                    while (NOT_A_CHARACTER != nextChar && --charCount > 0) {
                         outWord[++wordPos] = nextChar;
                         nextChar = getCharCodeAndForwardPointer(root, &pos);
                     }
                 }
+                *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
                 return ++wordPos;
             }
             // We need to skip past this char group, so skip any remaining chars after the
@@ -529,6 +532,16 @@ static inline int backoff(const int unigramFreq) {
     // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8);
 }
 
+inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) {
+    // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
+    // unigram frequency to be the median value of the 17th step from the top. A value of
+    // 0 for the bigram frequency represents the middle of the 16th step from the top,
+    // while a value of 15 represents the middle of the top step.
+    // See makedict.BinaryDictInputOutput for details.
+    const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
+    return (int)(unigramFreq + (bigramFreq + 1) * stepSize);
+}
+
 // This returns a probability in log space.
 inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
         const uint8_t *bigramFilter, const int unigramFreq) {
@@ -537,13 +550,7 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int,
     const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position);
     if (bigramFreqIt != bigramMap->end()) {
         const int bigramFreq = bigramFreqIt->second;
-        // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
-        // unigram frequency to be the median value of the 17th step from the top. A value of
-        // 0 for the bigram frequency represents the middle of the 16th step from the top,
-        // while a value of 15 represents the middle of the top step.
-        // See makedict.BinaryDictInputOutput for details.
-        const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
-        return (int)(unigramFreq + bigramFreq * stepSize);
+        return computeFrequencyForBigram(unigramFreq, bigramFreq);
     } else {
         return backoff(unigramFreq);
     }
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 65d0f73a3..1fb02478b 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -55,8 +55,8 @@ Dictionary::~Dictionary() {
     delete mBigramDictionary;
 }
 
-bool Dictionary::isValidWord(const int32_t *word, int length) {
-    return mUnigramDictionary->isValidWord(word, length);
+int Dictionary::getFrequency(const int32_t *word, int length) {
+    return mUnigramDictionary->getFrequency(word, length);
 }
 
 bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index 87891ee4d..9f2367904 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -52,7 +52,7 @@ class Dictionary {
                 maxWordLength, maxBigrams);
     }
 
-    bool isValidWord(const int32_t *word, int length);
+    int getFrequency(const int32_t *word, int length);
     bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
     void *getDict() { return (void *)mDict; }
     int getDictSize() { return mDictSize; }
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 828582848..efe9c4fe3 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -747,8 +747,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
     return maxFreq;
 }
 
-bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
-    return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
+int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const {
+    const uint8_t* const root = DICT_ROOT;
+    int pos = BinaryFormat::getTerminalPosition(root, inWord, length);
+    if (NOT_VALID_WORD == pos) {
+        return NOT_A_PROBABILITY;
+    }
+    const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+    const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
+    if (hasMultipleChars) {
+        pos = BinaryFormat::skipOtherCharacters(root, pos);
+    } else {
+        BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
+    }
+    const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
+    return unigramFreq;
 }
 
 // TODO: remove this function.
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index b9233518f..b70894004 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -72,7 +72,7 @@ class UnigramDictionary {
 
     UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
             int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
-    bool isValidWord(const int32_t* const inWord, const int length) const;
+    int getFrequency(const int32_t* const inWord, const int length) const;
     int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
     int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
             Correction *correction, const int *xcoordinates, const int *ycoordinates,