4 files changed, 93 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
index 063243e1b..dd11aaa37 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
@@ -16,6 +16,8 @@
 
 package com.android.inputmethod.latin;
 
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+
 import android.content.Context;
 import android.content.SharedPreferences;
 import android.content.pm.PackageManager.NameNotFoundException;
@@ -23,6 +25,7 @@ import android.content.res.AssetFileDescriptor;
 import android.util.Log;
 
 import java.io.File;
+import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Locale;
@@ -51,6 +54,9 @@ class BinaryDictionaryGetter {
     private static final String MAIN_DICTIONARY_CATEGORY = "main";
     public static final String ID_CATEGORY_SEPARATOR = ":";
 
+    // The key considered to read the version attribute in a dictionary file.
+    private static String VERSION_KEY = "version";
+
     // Prevents this from being instantiated
     private BinaryDictionaryGetter() {}
 
@@ -336,6 +342,42 @@ class BinaryDictionaryGetter {
         return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
     }
 
+    // ## HACK ## we prevent usage of a dictionary before version 18 for English only. The reason
+    // for this is, since those do not include whitelist entries, the new code with an old version
+    // of the dictionary would lose whitelist functionality.
+    private static boolean hackCanUseDictionaryFile(final Locale locale, final File f) {
+        // Only for English - other languages didn't have a whitelist, hence this
+        // ad-hock ## HACK ##
+        if (!Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) return true;
+
+        try {
+            // Read the version of the file
+            final RandomAccessFile raf = new RandomAccessFile(f, "r");
+            final int magic = raf.readInt();
+            if (magic != BinaryDictInputOutput.VERSION_2_MAGIC_NUMBER) {
+                return false;
+            }
+            final int formatVersion = raf.readInt();
+            final int headerSize = raf.readInt();
+            final HashMap<String, String> options = new HashMap<String, String>();
+            BinaryDictInputOutput.populateOptionsFromFile(raf, headerSize, options);
+            final String version = options.get(VERSION_KEY);
+            if (null == version) {
+                // No version in the options : the format is unexpected
+                return false;
+            }
+            // Version 18 is the first one to include the whitelist
+            // Obviously this is a big ## HACK ##
+            return Integer.parseInt(version) >= 18;
+        } catch (java.io.FileNotFoundException e) {
+            return false;
+        } catch (java.io.IOException e) {
+            return false;
+        } catch (NumberFormatException e) {
+            return false;
+        }
+    }
+
     /**
      * Returns a list of file addresses for a given locale, trying relevant methods in order.
      *
@@ -366,14 +408,15 @@ class BinaryDictionaryGetter {
         // cachedWordLists may not be null, see doc for getCachedDictionaryList
         for (final File f : cachedWordLists) {
             final String wordListId = getWordListIdFromFileName(f.getName());
-            if (isMainWordListId(wordListId)) {
+            final boolean canUse = f.canRead() && hackCanUseDictionaryFile(locale, f);
+            if (canUse && isMainWordListId(wordListId)) {
                 foundMainDict = true;
             }
             if (!dictPackSettings.isWordListActive(wordListId)) continue;
-            if (f.canRead()) {
+            if (canUse) {
                 fileList.add(AssetFileAddress.makeFromFileName(f.getPath()));
             } else {
-                Log.e(TAG, "Found a cached dictionary file but cannot read it");
+                Log.e(TAG, "Found a cached dictionary file but cannot read or use it");
             }
         }
 
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 884e6db29..c20f3a3a9 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -2001,6 +2001,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
 
     private CharSequence addToUserHistoryDictionary(final CharSequence suggestion) {
         if (TextUtils.isEmpty(suggestion)) return null;
+        if (mSuggest == null) return null;
 
         // If correction is not enabled, we don't add words to the user history dictionary.
         // That's to avoid unintended additions in some sensitive fields, or fields that
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
index 3bb670c9a..d516e72ad 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
@@ -52,14 +52,14 @@ public class UserHistoryDictionary extends ExpandableDictionary {
     private static final int FREQUENCY_FOR_TYPED = 2;
 
     /** Maximum number of pairs. Pruning will start when databases goes above this number. */
-    private static int sMaxHistoryBigrams = 10000;
+    public static final int sMaxHistoryBigrams = 10000;
 
     /**
      * When it hits maximum bigram pair, it will delete until you are left with
      * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
      * Do not keep this number small to avoid deleting too often.
      */
-    private static int sDeleteHistoryBigrams = 1000;
+    public static final int sDeleteHistoryBigrams = 1000;
 
     /**
      * Database version should increase if the database structure changes
@@ -109,12 +109,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
 
     private static DatabaseHelper sOpenHelper = null;
 
-    public void setDatabaseMax(int maxHistoryBigram) {
-        sMaxHistoryBigrams = maxHistoryBigram;
-    }
-
-    public void setDatabaseDelete(int deleteHistoryBigram) {
-        sDeleteHistoryBigrams = deleteHistoryBigram;
+    public String getLocale() {
+        return mLocale;
     }
 
     public synchronized static UserHistoryDictionary getInstance(
@@ -502,9 +498,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                                     needsToSave(fc, isValid, addLevel0Bigram)) {
                                 freq = fc;
                             } else {
+                                // Delete this entry
                                 freq = -1;
                             }
                         } else {
+                            // Delete this entry
                             freq = -1;
                         }
                     }
@@ -541,6 +539,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                                     getContentValues(word1, word2, mLocale));
                             pairId = pairIdLong.intValue();
                         }
+                        // Eliminate freq == 0 because that word is profanity.
                         if (freq > 0) {
                             if (PROFILE_SAVE_RESTORE) {
                                 ++profInsert;
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 2c3eee74c..b23b7db34 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -124,7 +124,7 @@ public class BinaryDictInputOutput {
      */
 
     private static final int VERSION_1_MAGIC_NUMBER = 0x78B1;
-    private static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+    public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
     private static final int MINIMUM_SUPPORTED_VERSION = 1;
     private static final int MAXIMUM_SUPPORTED_VERSION = 2;
     private static final int NOT_A_VERSION_NUMBER = -1;
@@ -783,10 +783,10 @@ public class BinaryDictInputOutput {
         // their lower bound and exclude their higher bound so we need to have the first step
         // start at exactly 1 unit higher than floor(unigramFreq + half a step).
         // Note : to reconstruct the score, the dictionary reader will need to divide
-        // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise, and add
-        // (discretizedFrequency + 0.5) times this value to get the median value of the step,
-        // which is the best approximation. This is how we get the most precise result with
-        // only four bits.
+        // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise to get the value of the step,
+        // and add (discretizedFrequency + 0.5 + 0.5) times this value to get the best
+        // approximation. (0.5 to get the first step start, and 0.5 to get the middle of the
+        // step pointed by the discretized frequency.
         final float stepSize =
                 (MAX_TERMINAL_FREQUENCY - unigramFrequency) / (1.5f + MAX_BIGRAM_FREQUENCY);
         final float firstStepStart = 1 + unigramFrequency + (stepSize / 2.0f);
@@ -1328,6 +1328,21 @@ public class BinaryDictInputOutput {
     }
 
     /**
+     * Reads options from a file and populate a map with their contents.
+     *
+     * The file is read at the current file pointer, so the caller must take care the pointer
+     * is in the right place before calling this.
+     */
+    public static void populateOptionsFromFile(final RandomAccessFile source, final long headerSize,
+            final HashMap<String, String> options) throws IOException {
+        while (source.getFilePointer() < headerSize) {
+            final String key = CharEncoding.readString(source);
+            final String value = CharEncoding.readString(source);
+            options.put(key, value);
+        }
+    }
+
+    /**
      * Reads a random access file and returns the memory representation of the dictionary.
      *
      * This high-level method takes a binary file and reads its contents, populating a
@@ -1358,11 +1373,7 @@ public class BinaryDictInputOutput {
         } else {
             headerSize = (source.readUnsignedByte() << 24) + (source.readUnsignedByte() << 16)
                     + (source.readUnsignedByte() << 8) + source.readUnsignedByte();
-            while (source.getFilePointer() < headerSize) {
-                final String key = CharEncoding.readString(source);
-                final String value = CharEncoding.readString(source);
-                options.put(key, value);
-            }
+            populateOptionsFromFile(source, headerSize, options);
             source.seek(headerSize);
         }
 
@@ -1410,4 +1421,22 @@ public class BinaryDictInputOutput {
             return false;
         }
     }
+
+    /**
+     * Calculate bigram frequency from compressed value
+     *
+     * @see #makeBigramFlags
+     *
+     * @param unigramFrequency
+     * @param bigramFrequency compressed frequency
+     * @return approximate bigram frequency
+     */
+    public static int reconstructBigramFrequency(final int unigramFrequency,
+            final int bigramFrequency) {
+        final float stepSize = (MAX_TERMINAL_FREQUENCY - unigramFrequency)
+                / (1.5f + MAX_BIGRAM_FREQUENCY);
+        final float resultFreqFloat = (float)unigramFrequency
+                + stepSize * (bigramFrequency + 1.0f);
+        return (int)resultFreqFloat;
+    }
 }