aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin')
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java49
-rw-r--r--java/src/com/android/inputmethod/latin/LatinIME.java1
-rw-r--r--java/src/com/android/inputmethod/latin/UserHistoryDictionary.java15
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java49
4 files changed, 93 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
index 063243e1b..dd11aaa37 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
@@ -16,6 +16,8 @@
package com.android.inputmethod.latin;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+
import android.content.Context;
import android.content.SharedPreferences;
import android.content.pm.PackageManager.NameNotFoundException;
@@ -23,6 +25,7 @@ import android.content.res.AssetFileDescriptor;
import android.util.Log;
import java.io.File;
+import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
@@ -51,6 +54,9 @@ class BinaryDictionaryGetter {
private static final String MAIN_DICTIONARY_CATEGORY = "main";
public static final String ID_CATEGORY_SEPARATOR = ":";
+ // The key considered to read the version attribute in a dictionary file.
+ private static String VERSION_KEY = "version";
+
// Prevents this from being instantiated
private BinaryDictionaryGetter() {}
@@ -336,6 +342,42 @@ class BinaryDictionaryGetter {
return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
}
+ // ## HACK ## we prevent usage of a dictionary before version 18 for English only. The reason
+ // for this is, since those do not include whitelist entries, the new code with an old version
+ // of the dictionary would lose whitelist functionality.
+ private static boolean hackCanUseDictionaryFile(final Locale locale, final File f) {
+ // Only for English - other languages didn't have a whitelist, hence this
+ // ad-hock ## HACK ##
+ if (!Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) return true;
+
+ try {
+ // Read the version of the file
+ final RandomAccessFile raf = new RandomAccessFile(f, "r");
+ final int magic = raf.readInt();
+ if (magic != BinaryDictInputOutput.VERSION_2_MAGIC_NUMBER) {
+ return false;
+ }
+ final int formatVersion = raf.readInt();
+ final int headerSize = raf.readInt();
+ final HashMap<String, String> options = new HashMap<String, String>();
+ BinaryDictInputOutput.populateOptionsFromFile(raf, headerSize, options);
+ final String version = options.get(VERSION_KEY);
+ if (null == version) {
+ // No version in the options : the format is unexpected
+ return false;
+ }
+ // Version 18 is the first one to include the whitelist
+ // Obviously this is a big ## HACK ##
+ return Integer.parseInt(version) >= 18;
+ } catch (java.io.FileNotFoundException e) {
+ return false;
+ } catch (java.io.IOException e) {
+ return false;
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ }
+
/**
* Returns a list of file addresses for a given locale, trying relevant methods in order.
*
@@ -366,14 +408,15 @@ class BinaryDictionaryGetter {
// cachedWordLists may not be null, see doc for getCachedDictionaryList
for (final File f : cachedWordLists) {
final String wordListId = getWordListIdFromFileName(f.getName());
- if (isMainWordListId(wordListId)) {
+ final boolean canUse = f.canRead() && hackCanUseDictionaryFile(locale, f);
+ if (canUse && isMainWordListId(wordListId)) {
foundMainDict = true;
}
if (!dictPackSettings.isWordListActive(wordListId)) continue;
- if (f.canRead()) {
+ if (canUse) {
fileList.add(AssetFileAddress.makeFromFileName(f.getPath()));
} else {
- Log.e(TAG, "Found a cached dictionary file but cannot read it");
+ Log.e(TAG, "Found a cached dictionary file but cannot read or use it");
}
}
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 884e6db29..c20f3a3a9 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -2001,6 +2001,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
private CharSequence addToUserHistoryDictionary(final CharSequence suggestion) {
if (TextUtils.isEmpty(suggestion)) return null;
+ if (mSuggest == null) return null;
// If correction is not enabled, we don't add words to the user history dictionary.
// That's to avoid unintended additions in some sensitive fields, or fields that
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
index 3bb670c9a..d516e72ad 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java
@@ -52,14 +52,14 @@ public class UserHistoryDictionary extends ExpandableDictionary {
private static final int FREQUENCY_FOR_TYPED = 2;
/** Maximum number of pairs. Pruning will start when databases goes above this number. */
- private static int sMaxHistoryBigrams = 10000;
+ public static final int sMaxHistoryBigrams = 10000;
/**
* When it hits maximum bigram pair, it will delete until you are left with
* only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
* Do not keep this number small to avoid deleting too often.
*/
- private static int sDeleteHistoryBigrams = 1000;
+ public static final int sDeleteHistoryBigrams = 1000;
/**
* Database version should increase if the database structure changes
@@ -109,12 +109,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
private static DatabaseHelper sOpenHelper = null;
- public void setDatabaseMax(int maxHistoryBigram) {
- sMaxHistoryBigrams = maxHistoryBigram;
- }
-
- public void setDatabaseDelete(int deleteHistoryBigram) {
- sDeleteHistoryBigrams = deleteHistoryBigram;
+ public String getLocale() {
+ return mLocale;
}
public synchronized static UserHistoryDictionary getInstance(
@@ -502,9 +498,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
needsToSave(fc, isValid, addLevel0Bigram)) {
freq = fc;
} else {
+ // Delete this entry
freq = -1;
}
} else {
+ // Delete this entry
freq = -1;
}
}
@@ -541,6 +539,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
getContentValues(word1, word2, mLocale));
pairId = pairIdLong.intValue();
}
+ // Eliminate freq == 0 because that word is profanity.
if (freq > 0) {
if (PROFILE_SAVE_RESTORE) {
++profInsert;
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 2c3eee74c..b23b7db34 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -124,7 +124,7 @@ public class BinaryDictInputOutput {
*/
private static final int VERSION_1_MAGIC_NUMBER = 0x78B1;
- private static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+ public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
private static final int MINIMUM_SUPPORTED_VERSION = 1;
private static final int MAXIMUM_SUPPORTED_VERSION = 2;
private static final int NOT_A_VERSION_NUMBER = -1;
@@ -783,10 +783,10 @@ public class BinaryDictInputOutput {
// their lower bound and exclude their higher bound so we need to have the first step
// start at exactly 1 unit higher than floor(unigramFreq + half a step).
// Note : to reconstruct the score, the dictionary reader will need to divide
- // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise, and add
- // (discretizedFrequency + 0.5) times this value to get the median value of the step,
- // which is the best approximation. This is how we get the most precise result with
- // only four bits.
+ // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise to get the value of the step,
+ // and add (discretizedFrequency + 0.5 + 0.5) times this value to get the best
+ // approximation. (0.5 to get the first step start, and 0.5 to get the middle of the
+ // step pointed by the discretized frequency.
final float stepSize =
(MAX_TERMINAL_FREQUENCY - unigramFrequency) / (1.5f + MAX_BIGRAM_FREQUENCY);
final float firstStepStart = 1 + unigramFrequency + (stepSize / 2.0f);
@@ -1328,6 +1328,21 @@ public class BinaryDictInputOutput {
}
/**
+ * Reads options from a file and populate a map with their contents.
+ *
+ * The file is read at the current file pointer, so the caller must take care the pointer
+ * is in the right place before calling this.
+ */
+ public static void populateOptionsFromFile(final RandomAccessFile source, final long headerSize,
+ final HashMap<String, String> options) throws IOException {
+ while (source.getFilePointer() < headerSize) {
+ final String key = CharEncoding.readString(source);
+ final String value = CharEncoding.readString(source);
+ options.put(key, value);
+ }
+ }
+
+ /**
* Reads a random access file and returns the memory representation of the dictionary.
*
* This high-level method takes a binary file and reads its contents, populating a
@@ -1358,11 +1373,7 @@ public class BinaryDictInputOutput {
} else {
headerSize = (source.readUnsignedByte() << 24) + (source.readUnsignedByte() << 16)
+ (source.readUnsignedByte() << 8) + source.readUnsignedByte();
- while (source.getFilePointer() < headerSize) {
- final String key = CharEncoding.readString(source);
- final String value = CharEncoding.readString(source);
- options.put(key, value);
- }
+ populateOptionsFromFile(source, headerSize, options);
source.seek(headerSize);
}
@@ -1410,4 +1421,22 @@ public class BinaryDictInputOutput {
return false;
}
}
+
+ /**
+ * Calculate bigram frequency from compressed value
+ *
+ * @see #makeBigramFlags
+ *
+ * @param unigramFrequency
+ * @param bigramFrequency compressed frequency
+ * @return approximate bigram frequency
+ */
+ public static int reconstructBigramFrequency(final int unigramFrequency,
+ final int bigramFrequency) {
+ final float stepSize = (MAX_TERMINAL_FREQUENCY - unigramFrequency)
+ / (1.5f + MAX_BIGRAM_FREQUENCY);
+ final float resultFreqFloat = (float)unigramFrequency
+ + stepSize * (bigramFrequency + 1.0f);
+ return (int)resultFreqFloat;
+ }
}