12 files changed, 127 insertions, 120 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index ab2a12fd0..d369e2b47 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -91,7 +91,7 @@ public final class BinaryDictionary extends Dictionary {
 
     private static native long openNative(String sourceDir, long dictOffset, long dictSize);
     private static native void closeNative(long dict);
-    private static native int getFrequencyNative(long dict, int[] word);
+    private static native int getProbabilityNative(long dict, int[] word);
     private static native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
     private static native int getSuggestionsNative(long dict, long proximityInfo,
             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
@@ -186,7 +186,7 @@ public final class BinaryDictionary extends Dictionary {
     public int getFrequency(final String word) {
         if (word == null) return -1;
         int[] codePoints = StringUtils.toCodePointArray(word);
-        return getFrequencyNative(mNativeDict, codePoints);
+        return getProbabilityNative(mNativeDict, codePoints);
     }
 
     // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index ca38b0de5..9321c4b8c 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -203,14 +203,14 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
     return count;
 }
 
-static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jclass clazz, jlong dict,
+static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
         jintArray wordArray) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) return 0;
     const jsize codePointLength = env->GetArrayLength(wordArray);
     int codePoints[codePointLength];
     env->GetIntArrayRegion(wordArray, 0, codePointLength, codePoints);
-    return dictionary->getFrequency(codePoints, codePointLength);
+    return dictionary->getProbability(codePoints, codePointLength);
 }
 
 static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
@@ -285,8 +285,8 @@ static JNINativeMethod sMethods[] = {
     {"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
     {"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
             reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
-    {"getFrequencyNative", "(J[I)I",
-            reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
+    {"getProbabilityNative", "(J[I)I",
+            reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)},
     {"isValidBigramNative", "(J[I[I)Z",
             reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
     {"calcNormalizedScoreNative", "([I[II)F",
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index ef0434c49..43e59a262 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -36,21 +36,21 @@ BigramDictionary::BigramDictionary(const uint8_t *const streamStart) : DICT_ROOT
 BigramDictionary::~BigramDictionary() {
 }
 
-void BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
+void BigramDictionary::addWordBigram(int *word, int length, int probability, int *bigramProbability,
         int *bigramCodePoints, int *outputTypes) const {
     word[length] = 0;
     if (DEBUG_DICT) {
 #ifdef FLAG_DBG
         char s[length + 1];
         for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
-        AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
+        AKLOGI("Bigram: Found word = %s, freq = %d :", s, probability);
 #endif
     }
 
     // Find the right insertion point
     int insertAt = 0;
     while (insertAt < MAX_RESULTS) {
-        if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
+        if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability
                 && length < getCodePointCount(MAX_WORD_LENGTH,
                         bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
             break;
@@ -63,10 +63,10 @@ void BigramDictionary::addWordBigram(int *word, int length, int frequency, int *
     if (insertAt >= MAX_RESULTS) {
         return;
     }
-    memmove(bigramFreq + (insertAt + 1),
-            bigramFreq + insertAt,
-            (MAX_RESULTS - insertAt - 1) * sizeof(bigramFreq[0]));
-    bigramFreq[insertAt] = frequency;
+    memmove(bigramProbability + (insertAt + 1),
+            bigramProbability + insertAt,
+            (MAX_RESULTS - insertAt - 1) * sizeof(bigramProbability[0]));
+    bigramProbability[insertAt] = probability;
     outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
     memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
             bigramCodePoints + insertAt * MAX_WORD_LENGTH,
@@ -87,7 +87,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int frequency, int *
  * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
  * inputSize: the size of the codes array.
  * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
- * bigramFreq: an array to output frequencies.
+ * bigramProbability: an array to output frequencies.
  * outputTypes: an array to output types.
  * This method returns the number of bigrams this word has, for backward compatibility.
  * Note: this is not the number of bigrams output in the array, which is the number of
@@ -98,7 +98,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int frequency, int *
  * reduce their scope to the ones that match the first letter.
  */
 int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints,
-        int inputSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
+        int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
     // TODO: remove unused arguments, and refrain from storing stuff in members of this class
     // TODO: have "in" arguments before "out" ones, and make out args explicit in the name
 
@@ -118,23 +118,24 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
     do {
         bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
         int bigramBuffer[MAX_WORD_LENGTH];
-        int unigramFreq = 0;
+        int unigramProbability = 0;
         const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
                 &pos);
         const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
-                bigramBuffer, &unigramFreq);
+                bigramBuffer, &unigramProbability);
 
         // inputSize == 0 means we are trying to find bigram predictions.
         if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
-            const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
-            // Due to space constraints, the frequency for bigrams is approximate - the lower the
-            // unigram frequency, the worse the precision. The theoritical maximum error in
-            // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
+            const int bigramProbabilityTemp =
+                    BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
+            // Due to space constraints, the probability for bigrams is approximate - the lower the
+            // unigram probability, the worse the precision. The theoritical maximum error in
+            // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
             // in very bad cases. This means that sometimes, we'll see some bigrams interverted
             // here, but it can't get too bad.
-            const int frequency =
-                    BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
-            addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
+            const int probability = BinaryFormat::computeProbabilityForBigram(
+                    unigramProbability, bigramProbabilityTemp);
+            addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
                     outputTypes);
             ++bigramCount;
         }
@@ -159,13 +160,13 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
     } else {
         pos = BinaryFormat::skipOtherCharacters(root, pos);
     }
-    pos = BinaryFormat::skipFrequency(flags, pos);
+    pos = BinaryFormat::skipProbability(flags, pos);
     pos = BinaryFormat::skipChildrenPosition(flags, pos);
     pos = BinaryFormat::skipShortcuts(root, flags, pos);
     return pos;
 }
 
-void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevWord,
+void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord,
         const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const {
     memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
     const uint8_t *const root = DICT_ROOT;
@@ -181,10 +182,10 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevW
     uint8_t bigramFlags;
     do {
         bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
-        const int frequency = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+        const int probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
         const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
                 &pos);
-        (*map)[bigramPos] = frequency;
+        (*map)[bigramPos] = probability;
         setInFilter(filter, bigramPos);
     } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
 }
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 2ce6c1d0d..b86e564c3 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -29,14 +29,14 @@ class BigramDictionary {
     BigramDictionary(const uint8_t *const streamStart);
     int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
             int *frequencies, int *outputTypes) const;
-    void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
+    void fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, const int prevWordLength,
             std::map<int, int> *map, uint8_t *filter) const;
     bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
     ~BigramDictionary();
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
-    void addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
-            int *outputTypes) const;
+    void addWordBigram(int *word, int length, int probability, int *bigramProbability,
+            int *bigramCodePoints, int *outputTypes) const;
     bool checkFirstCharacter(int *word, int *inputCodePoints) const;
     int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
             const bool forceLowerCaseSearch) const;
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 2d7c4b492..1c4061fd8 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -52,10 +52,10 @@ class BinaryFormat {
     // Flag for sign of offset. If this flag is set, the offset value must be negated.
     static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
 
-    // Mask for attribute frequency, stored on 4 bits inside the flags byte.
-    static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
-    // The numeric value of the shortcut frequency that means 'whitelist'.
-    static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
+    // Mask for attribute probability, stored on 4 bits inside the flags byte.
+    static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+    // The numeric value of the shortcut probability that means 'whitelist'.
+    static const int WHITELIST_SHORTCUT_PROBABILITY = 15;
 
     // Mask and flags for attribute address type selection.
     static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
@@ -72,10 +72,10 @@ class BinaryFormat {
     static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
     static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
     static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
-    static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
+    static int readProbabilityWithoutMovingPointer(const uint8_t *const dict, const int pos);
     static int skipOtherCharacters(const uint8_t *const dict, const int pos);
     static int skipChildrenPosition(const uint8_t flags, const int pos);
-    static int skipFrequency(const uint8_t flags, const int pos);
+    static int skipProbability(const uint8_t flags, const int pos);
     static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
     static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
             const int pos);
@@ -83,14 +83,15 @@ class BinaryFormat {
     static bool hasChildrenInFlags(const uint8_t flags);
     static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
             int *pos);
-    static int getAttributeFrequencyFromFlags(const int flags);
+    static int getAttributeProbabilityFromFlags(const int flags);
     static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
             const int length, const bool forceLowerCaseSearch);
     static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
-            int *outWord, int *outUnigramFrequency);
-    static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
+            int *outWord, int *outUnigramProbability);
+    static int computeProbabilityForBigram(
+            const int unigramProbability, const int bigramProbability);
     static int getProbability(const int position, const std::map<int, int> *bigramMap,
-            const uint8_t *bigramFilter, const int unigramFreq);
+            const uint8_t *bigramFilter, const int unigramProbability);
 
     // Flags for special processing
     // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
@@ -264,7 +265,7 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *c
     }
 }
 
-inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const dict,
+inline int BinaryFormat::readProbabilityWithoutMovingPointer(const uint8_t *const dict,
         const int pos) {
     return dict[pos];
 }
@@ -320,7 +321,7 @@ inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos
     return pos + childrenAddressSize(flags);
 }
 
-inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
+inline int BinaryFormat::skipProbability(const uint8_t flags, const int pos) {
     return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
 }
 
@@ -415,8 +416,8 @@ AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uin
     }
 }
 
-inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
-    return flags & MASK_ATTRIBUTE_FREQUENCY;
+inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
+    return flags & MASK_ATTRIBUTE_PROBABILITY;
 }
 
 // This function gets the byte position of the last chargroup of the exact matching word in the
@@ -466,7 +467,7 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
                     if (wordPos == length) {
                         return charGroupPos;
                     }
-                    pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos);
+                    pos = BinaryFormat::skipProbability(FLAG_IS_TERMINAL, pos);
                 }
                 if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
                     return NOT_VALID_WORD;
@@ -481,7 +482,7 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
                 if (FLAG_HAS_MULTIPLE_CHARS & flags) {
                     pos = BinaryFormat::skipOtherCharacters(root, pos);
                 }
-                pos = BinaryFormat::skipFrequency(flags, pos);
+                pos = BinaryFormat::skipProbability(flags, pos);
                 pos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
             }
             --charGroupCount;
@@ -504,11 +505,11 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
  * address: the byte position of the last chargroup of the word we are searching for (this is
  *   what is stored as the "bigram address" in each bigram)
  * outword: an array to write the found word, with MAX_WORD_LENGTH size.
- * outUnigramFrequency: a pointer to an int to write the frequency into.
+ * outUnigramProbability: a pointer to an int to write the probability into.
  * Return value : the length of the word, of 0 if the word was not found.
  */
 AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
-        const int maxDepth, int *outWord, int *outUnigramFrequency) {
+        const int maxDepth, int *outWord, int *outUnigramProbability) {
     int pos = 0;
     int wordPos = 0;
 
@@ -541,15 +542,15 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
                         nextChar = getCodePointAndForwardPointer(root, &pos);
                     }
                 }
-                *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
+                *outUnigramProbability = readProbabilityWithoutMovingPointer(root, pos);
                 return ++wordPos;
             }
             // We need to skip past this char group, so skip any remaining chars after the
-            // first and possibly the frequency.
+            // first and possibly the probability.
             if (FLAG_HAS_MULTIPLE_CHARS & flags) {
                 pos = skipOtherCharacters(root, pos);
             }
-            pos = skipFrequency(flags, pos);
+            pos = skipProbability(flags, pos);
 
             // The fact that this group has children is very important. Since we already know
             // that this group does not match, if it has no children we know it is irrelevant
@@ -604,9 +605,9 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
                         }
                     }
                     ++wordPos;
-                    // Now we only need to branch to the children address. Skip the frequency if
+                    // Now we only need to branch to the children address. Skip the probability if
                     // it's there, read pos, and break to resume the search at pos.
-                    lastCandidateGroupPos = skipFrequency(lastFlags, lastCandidateGroupPos);
+                    lastCandidateGroupPos = skipProbability(lastFlags, lastCandidateGroupPos);
                     pos = readChildrenPosition(root, lastFlags, lastCandidateGroupPos);
                     break;
                 } else {
@@ -635,36 +636,39 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
     return 0;
 }
 
-static inline int backoff(const int unigramFreq) {
-    return unigramFreq;
+static inline int backoff(const int unigramProbability) {
+    return unigramProbability;
     // For some reason, applying the backoff weight gives bad results in tests. To apply the
     // backoff weight, we divide the probability by 2, which in our storing format means
     // decreasing the score by 8.
     // TODO: figure out what's wrong with this.
-    // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8);
+    // return unigramProbability > 8 ? unigramProbability - 8 : (0 == unigramProbability ? 0 : 8);
 }
 
-inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) {
-    // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
-    // unigram frequency to be the median value of the 17th step from the top. A value of
-    // 0 for the bigram frequency represents the middle of the 16th step from the top,
+inline int BinaryFormat::computeProbabilityForBigram(
+        const int unigramProbability, const int bigramProbability) {
+    // We divide the range [unigramProbability..255] in 16.5 steps - in other words, we want the
+    // unigram probability to be the median value of the 17th step from the top. A value of
+    // 0 for the bigram probability represents the middle of the 16th step from the top,
     // while a value of 15 represents the middle of the top step.
     // See makedict.BinaryDictInputOutput for details.
-    const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
-    return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
+    const float stepSize = static_cast<float>(MAX_PROBABILITY - unigramProbability)
+            / (1.5f + MAX_BIGRAM_ENCODED_PROBABILITY);
+    return unigramProbability
+            + static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize);
 }
 
 // This returns a probability in log space.
 inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
-        const uint8_t *bigramFilter, const int unigramFreq) {
-    if (!bigramMap || !bigramFilter) return backoff(unigramFreq);
-    if (!isInFilter(bigramFilter, position)) return backoff(unigramFreq);
-    const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position);
-    if (bigramFreqIt != bigramMap->end()) {
-        const int bigramFreq = bigramFreqIt->second;
-        return computeFrequencyForBigram(unigramFreq, bigramFreq);
+        const uint8_t *bigramFilter, const int unigramProbability) {
+    if (!bigramMap || !bigramFilter) return backoff(unigramProbability);
+    if (!isInFilter(bigramFilter, position)) return backoff(unigramProbability);
+    const std::map<int, int>::const_iterator bigramProbabilityIt = bigramMap->find(position);
+    if (bigramProbabilityIt != bigramMap->end()) {
+        const int bigramProbability = bigramProbabilityIt->second;
+        return computeProbabilityForBigram(unigramProbability, bigramProbability);
     }
-    return backoff(unigramFreq);
+    return backoff(unigramProbability);
 }
 } // namespace latinime
 #endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 0ae02d506..671507ee0 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -841,7 +841,7 @@ inline static bool isUpperCase(unsigned short c) {
             const int freq = freqArray[i];
             // Demote too short weak words
             if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) {
-                multiplyRate(100 * freq / MAX_FREQ, &totalFreq);
+                multiplyRate(100 * freq / MAX_PROBABILITY, &totalFreq);
             }
             if (wordLength == 1) {
                 ++oneLengthCounter;
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 0aedc287f..6e098157d 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -72,11 +72,11 @@ AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sour
 }
 
 static inline void dumpWordInfo(const int *word, const int length, const int rank,
-        const int frequency) {
+        const int probability) {
     static char charBuf[50];
     const int N = intArrayToCharArray(word, length, charBuf);
     if (N > 1) {
-        AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency);
+        AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
     }
 }
 
@@ -312,8 +312,8 @@ static inline void prof_out(void) {
 #define ZERO_DISTANCE_PROMOTION_RATE 110.0f
 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
 #define HALF_SCORE_SQUARED_RADIUS 32.0f
-#define MAX_FREQ 255
-#define MAX_BIGRAM_FREQ 15
+#define MAX_PROBABILITY 255
+#define MAX_BIGRAM_ENCODED_PROBABILITY 15
 
 // Assuming locale strings such as en_US, sr-Latn etc.
 #define MAX_LOCALE_STRING_LENGTH 10
@@ -335,8 +335,8 @@ static inline void prof_out(void) {
 
 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f
 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f
-/* heuristic... This should be changed if we change the unit of the frequency. */
-#define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100)
+/* heuristic... This should be changed if we change the unit of the probability. */
+#define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_PROBABILITY * 58 / 100)
 
 #define MAX_DEPTH_MULTIPLIER 3
 #define FIRST_WORD_INDEX 0
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 2be1f4f39..6deab36b6 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -62,7 +62,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
     } else {
         std::map<int, int> bigramMap;
         uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
-        mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordCodePoints,
+        mBigramDictionary->fillBigramAddressToProbabilityMapAndFilter(prevWordCodePoints,
                 prevWordLength, &bigramMap, bigramFilter);
         result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates,
                 inputCodePoints, inputSize, &bigramMap, bigramFilter, useFullEditDistance, outWords,
@@ -78,8 +78,8 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
             frequencies, outputTypes);
 }
 
-int Dictionary::getFrequency(const int *word, int length) const {
-    return mUnigramDictionary->getFrequency(word, length);
+int Dictionary::getProbability(const int *word, int length) const {
+    return mUnigramDictionary->getProbability(word, length);
 }
 
 bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const {
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index ecdddd771..449b95ab6 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -52,7 +52,7 @@ class Dictionary {
     int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
             int *frequencies, int *outputTypes) const;
 
-    int getFrequency(const int *word, int length) const;
+    int getProbability(const int *word, int length) const;
     bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
     const uint8_t *getDict() const { // required to release dictionary buffer
         return mDict;
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index a8cc03b8d..144ae1452 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -51,7 +51,7 @@ class TerminalAttributes {
                 if (NOT_A_CODE_POINT == codePoint) break;
                 outWord[i] = codePoint;
             }
-            *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
+            *outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
             return i;
         }
 
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 0b18e78a3..80ba412a3 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -52,8 +52,8 @@ UnigramDictionary::~UnigramDictionary() {
 }
 
 // TODO: This needs to take a const int* and not tinker with its contents
-static void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, int type) {
-    queue->push(frequency, word, length, type);
+static void addWord(int *word, int length, int probability, WordsPriorityQueue *queue, int type) {
+    queue->push(probability, word, length, type);
 }
 
 // Return the replacement code point for a digraph, or 0 if none.
@@ -158,7 +158,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
             queuePool);
 }
 
-// bigramMap contains the association <bigram address> -> <bigram frequency>
+// bigramMap contains the association <bigram address> -> <bigram probability>
 // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
 // in bigram_dictionary.cpp
 int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
@@ -399,7 +399,7 @@ void UnigramDictionary::onTerminal(const int probability,
                     MAX_WORD_LENGTH, shortcutTarget, &shortcutFrequency);
             int shortcutScore;
             int kind;
-            if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
+            if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
                     && correction->sameAsTyped()) {
                 shortcutScore = S_INT_MAX;
                 kind = Dictionary::KIND_WHITELIST;
@@ -483,7 +483,7 @@ int UnigramDictionary::getSubStringSuggestion(
             inputSize, correction);
 
     int word[MAX_WORD_LENGTH];
-    int freq = getMostFrequentWordLike(
+    int freq = getMostProbableWordLike(
             inputWordStartPos, inputWordLength, correction, word);
     if (freq > 0) {
         nextWordLength = inputWordLength;
@@ -679,15 +679,15 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
             outputWord);
 }
 
-// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
+// Wrapper for getMostProbableWordLikeInner, which matches it to the previous
 // interface.
-int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, const int inputSize,
+int UnigramDictionary::getMostProbableWordLike(const int startInputIndex, const int inputSize,
         Correction *correction, int *word) const {
     int inWord[inputSize];
     for (int i = 0; i < inputSize; ++i) {
         inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
     }
-    return getMostFrequentWordLikeInner(inWord, inputSize, word);
+    return getMostProbableWordLikeInner(inWord, inputSize, word);
 }
 
 // This function will take the position of a character array within a CharGroup,
@@ -738,9 +738,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
 }
 
 // This function is invoked when a word like the word searched for is found.
-// It will compare the frequency to the max frequency, and if greater, will
+// It will compare the probability to the max probability, and if greater, will
 // copy the word into the output buffer. In output value maxFreq, it will
-// write the new maximum frequency if it changed.
+// write the new maximum probability if it changed.
 static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
         int *maxFreq) {
     if (freq > *maxFreq) {
@@ -752,9 +752,9 @@ static inline void onTerminalWordLike(const int freq, int *newWord, const int le
     }
 }
 
-// Will find the highest frequency of the words like the one passed as an argument,
+// Will find the highest probability of the words like the one passed as an argument,
 // that is, everything that only differs by case/accents.
-int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
+int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
         int *outWord) const {
     int newWord[MAX_WORD_LENGTH];
     int depth = 0;
@@ -775,17 +775,18 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, con
             int inputIndex = stackInputIndex[depth];
             const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
             // Test whether all chars in this group match with the word we are searching for. If so,
-            // we want to traverse its children (or if the inputSize match, evaluate its frequency).
-            // Note that this function will output the position regardless, but will only write
-            // into inputIndex if there is a match.
+            // we want to traverse its children (or if the inputSize match, evaluate its
+            // probability). Note that this function will output the position regardless, but will
+            // only write into inputIndex if there is a match.
             const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
                     inputIndex, inputSize, newWord, &inputIndex, &pos);
             if (isAlike && (!(BinaryFormat::FLAG_IS_NOT_A_WORD & flags))
                     && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
-                const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
-                onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
+                const int probability =
+                        BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
+                onTerminalWordLike(probability, newWord, inputIndex, outWord, &maxFreq);
             }
-            pos = BinaryFormat::skipFrequency(flags, pos);
+            pos = BinaryFormat::skipProbability(flags, pos);
             const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
             const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
             // If we had a match and the word has children, we want to traverse them. We don't have
@@ -816,7 +817,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, con
     return maxFreq;
 }
 
-int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
+int UnigramDictionary::getProbability(const int *const inWord, const int length) const {
     const uint8_t *const root = DICT_ROOT;
     int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
             false /* forceLowerCaseSearch */);
@@ -826,7 +827,7 @@ int UnigramDictionary::getFrequency(const int *const inWord, const int length) c
     const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
     if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
         // If this is not a word, or if it's a blacklisted entry, it should behave as
-        // having no frequency outside of the suggestion process (where it should be used
+        // having no probability outside of the suggestion process (where it should be used
         // for shortcuts).
         return NOT_A_PROBABILITY;
     }
@@ -836,8 +837,8 @@ int UnigramDictionary::getFrequency(const int *const inWord, const int length) c
     } else {
         BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
     }
-    const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
-    return unigramFreq;
+    const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
+    return unigramProbability;
 }
 
 // TODO: remove this function.
@@ -884,7 +885,7 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
 
     // This gets only ONE character from the stream. Next there will be:
     // if FLAG_HAS_MULTIPLE CHARS: the other characters of the same node
-    // else if FLAG_IS_TERMINAL: the frequency
+    // else if FLAG_IS_TERMINAL: the probability
     // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
     // Note that you can't have a node that both is not a terminal and has no children.
     int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
@@ -917,14 +918,14 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
             // We found that this is an unrelated character, so we should give up traversing
             // this node and its children entirely.
             // However we may not be on the last virtual node yet so we skip the remaining
-            // characters in this node, the frequency if it's there, read the next sibling
+            // characters in this node, the probability if it's there, read the next sibling
             // position to output it, then return false.
             // We don't have to output other values because we return false, as in
             // "don't traverse children".
             if (!isLastChar) {
                 pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos);
             }
-            pos = BinaryFormat::skipFrequency(flags, pos);
+            pos = BinaryFormat::skipProbability(flags, pos);
             *nextSiblingPosition =
                     BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
             return false;
@@ -937,16 +938,17 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
     } while (NOT_A_CODE_POINT != c);
 
     if (isTerminalNode) {
-        // The frequency should be here, because we come here only if this is actually
+        // The probability should be here, because we come here only if this is actually
         // a terminal node, and we are on its last char.
-        const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-        const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
+        const int unigramProbability =
+                BinaryFormat::readProbabilityWithoutMovingPointer(DICT_ROOT, pos);
+        const int childrenAddressPos = BinaryFormat::skipProbability(flags, pos);
         const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
         TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
         // bigramMap contains the bigram frequencies indexed by addresses for fast lookup.
         // bigramFilter is a bloom filter of said frequencies for even faster rejection.
         const int probability = BinaryFormat::getProbability(initialPos, bigramMap, bigramFilter,
-                unigramFreq);
+                unigramProbability);
         onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
                 currentWordIndex);
 
@@ -961,7 +963,7 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
         // Note that !hasChildren implies isLastChar, so we know we don't have to skip any
         // remaining char in this group for there can't be any.
         if (!hasChildren) {
-            pos = BinaryFormat::skipFrequency(flags, pos);
+            pos = BinaryFormat::skipProbability(flags, pos);
             *nextSiblingPosition =
                     BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
             return false;
@@ -969,7 +971,7 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
 
         // Optimization: Prune out words that are too long compared to how much was typed.
         if (correction->needsToPrune()) {
-            pos = BinaryFormat::skipFrequency(flags, pos);
+            pos = BinaryFormat::skipProbability(flags, pos);
             *nextSiblingPosition =
                     BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
             if (DEBUG_DICT_FULL) {
@@ -983,13 +985,13 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
     // children, we can't come here.
     ASSERT(BinaryFormat::hasChildrenInFlags(flags));
 
-    // If this node was a terminal it still has the frequency under the pointer (it may have been
-    // read, but not skipped - see readFrequencyWithoutMovingPointer).
+    // If this node was a terminal it still has the probability under the pointer (it may have been
+    // read, but not skipped - see readProbabilityWithoutMovingPointer).
     // Next come the children position, then possibly attributes (attributes are bigrams only for
     // now, maybe something related to shortcuts in the future).
     // Once this is read, we still need to output the number of nodes in the immediate children of
     // this node, so we read and output it before returning true, as in "please traverse children".
-    pos = BinaryFormat::skipFrequency(flags, pos);
+    pos = BinaryFormat::skipProbability(flags, pos);
     int childrenPos = BinaryFormat::readChildrenPosition(DICT_ROOT, flags, pos);
     *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
     *newCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &childrenPos);
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index 502bf4790..c1955e8bb 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -40,7 +40,7 @@ class UnigramDictionary {
     static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
     static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
     UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags);
-    int getFrequency(const int *const inWord, const int length) const;
+    int getProbability(const int *const inWord, const int length) const;
     int getBigramPosition(int pos, int *word, int offset, int length) const;
     int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *inputCodePoints, const int inputSize,
@@ -89,9 +89,9 @@ class UnigramDictionary {
             const uint8_t *bigramFilter, Correction *correction, int *newCount,
             int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
             const int currentWordIndex) const;
-    int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
+    int getMostProbableWordLike(const int startInputIndex, const int inputSize,
             Correction *correction, int *word) const;
-    int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
+    int getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
             int *outWord) const;
     int getSubStringSuggestion(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const bool useFullEditDistance,