aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/res/values/donottranslate.xml4
-rw-r--r--java/res/values/strings.xml6
-rw-r--r--java/src/com/android/inputmethod/latin/AdditionalSubtype.java18
-rw-r--r--java/src/com/android/inputmethod/latin/Constants.java19
-rw-r--r--java/src/com/android/inputmethod/latin/SubtypeLocale.java57
-rw-r--r--native/jni/src/bigram_dictionary.cpp16
-rw-r--r--native/jni/src/bigram_dictionary.h6
-rw-r--r--native/jni/src/binary_format.h11
-rw-r--r--native/jni/src/defines.h18
-rw-r--r--native/jni/src/dictionary.h13
-rw-r--r--native/jni/src/unigram_dictionary.cpp53
-rw-r--r--native/jni/src/unigram_dictionary.h30
-rw-r--r--tests/src/com/android/inputmethod/latin/SubtypeLocaleTests.java10
13 files changed, 176 insertions, 85 deletions
diff --git a/java/res/values/donottranslate.xml b/java/res/values/donottranslate.xml
index 4fe447e20..73aa54f7e 100644
--- a/java/res/values/donottranslate.xml
+++ b/java/res/values/donottranslate.xml
@@ -134,7 +134,9 @@
<item>5</item>
</string-array>
- <!-- Subtype locale display name exceptions -->
+ <!-- Subtype locale display name exceptions.
+ For each exception, there should be related string resource for display name that has
+ explicit keyboard layout. The string resource name must be "subtype_with_layout_<locale>. -->
<string-array name="subtype_locale_exception_keys">
<item>en_US</item>
<item>en_GB</item>
diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml
index 69fdea79f..cb458a4ed 100644
--- a/java/res/values/strings.xml
+++ b/java/res/values/strings.xml
@@ -252,6 +252,12 @@
<string name="subtype_en_GB">English (UK)</string>
<!-- Description for English (United States) keyboard subtype [CHAR LIMIT=25] -->
<string name="subtype_en_US">English (US)</string>
+ <!-- Description for English (United Kingdom) keyboard subtype with explicit keyboard layout [CHAR LIMIT=25]
+ This should be identical to subtype_en_GB aside from the trailing (%s). -->
+ <string name="subtype_with_layout_en_GB">English (UK) (<xliff:g id="layout">%s</xliff:g>)</string>
+ <!-- Description for English (United States) keyboard subtype with explicit keyboard layout [CHAR LIMIT=25]
+ This should be identical to subtype_en_US aside from the trailing (%s). -->
+ <string name="subtype_with_layout_en_US">English (US) (<xliff:g id="layout">%s</xliff:g>)</string>
<!-- Description for language agnostic keyboard subtype [CHAR LIMIT=25] -->
<string name="subtype_no_language">No language</string>
<!-- Description for language agnostic QWERTY keyboard subtype [CHAR LIMIT=25] -->
diff --git a/java/src/com/android/inputmethod/latin/AdditionalSubtype.java b/java/src/com/android/inputmethod/latin/AdditionalSubtype.java
index 06d33154f..f0076a5b6 100644
--- a/java/src/com/android/inputmethod/latin/AdditionalSubtype.java
+++ b/java/src/com/android/inputmethod/latin/AdditionalSubtype.java
@@ -19,7 +19,9 @@ package com.android.inputmethod.latin;
import static com.android.inputmethod.latin.Constants.Subtype.KEYBOARD_MODE;
import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.IS_ADDITIONAL_SUBTYPE;
import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.KEYBOARD_LAYOUT_SET;
+import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME;
+import android.os.Build;
import android.view.inputmethod.InputMethodSubtype;
import java.util.ArrayList;
@@ -40,12 +42,22 @@ public class AdditionalSubtype {
public static InputMethodSubtype createAdditionalSubtype(
String localeString, String keyboardLayoutSetName, String extraValue) {
final String layoutExtraValue = KEYBOARD_LAYOUT_SET + "=" + keyboardLayoutSetName;
- final String filteredExtraValue = StringUtils.appendToCsvIfNotExists(
- IS_ADDITIONAL_SUBTYPE, extraValue);
+ final String layoutDisplayNameExtraValue;
+ if (Build.VERSION.SDK_INT >= /* JELLY_BEAN */ 15
+ && SubtypeLocale.isExceptionalLocale(localeString)) {
+ final String layoutDisplayName = SubtypeLocale.getKeyboardLayoutSetDisplayName(
+ keyboardLayoutSetName);
+ layoutDisplayNameExtraValue = StringUtils.appendToCsvIfNotExists(
+ UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME + "=" + layoutDisplayName, extraValue);
+ } else {
+ layoutDisplayNameExtraValue = extraValue;
+ }
+ final String additionalSubtypeExtraValue = StringUtils.appendToCsvIfNotExists(
+ IS_ADDITIONAL_SUBTYPE, layoutDisplayNameExtraValue);
final int nameId = SubtypeLocale.getSubtypeNameId(localeString, keyboardLayoutSetName);
return new InputMethodSubtype(nameId, R.drawable.ic_subtype_keyboard,
localeString, KEYBOARD_MODE,
- layoutExtraValue + "," + filteredExtraValue, false, false);
+ layoutExtraValue + "," + additionalSubtypeExtraValue, false, false);
}
public static String getPrefSubtype(InputMethodSubtype subtype) {
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java
index b205cc004..7c2284569 100644
--- a/java/src/com/android/inputmethod/latin/Constants.java
+++ b/java/src/com/android/inputmethod/latin/Constants.java
@@ -65,11 +65,6 @@ public final class Constants {
public static final class ExtraValue {
/**
- * The subtype extra value used to indicate that the subtype keyboard layout set name.
- */
- public static final String KEYBOARD_LAYOUT_SET = "KeyboardLayoutSet";
-
- /**
* The subtype extra value used to indicate that the subtype keyboard layout is capable
* for typing ASCII characters.
*/
@@ -82,6 +77,20 @@ public final class Constants {
public static final String REQ_NETWORK_CONNECTIVITY = "requireNetworkConnectivity";
/**
+ * The subtype extra value used to indicate that the subtype display name contains "%s"
+ * for replacement mark and it should be replaced by this extra value.
+ * This extra value is supported on JellyBean and later.
+ */
+ public static final String UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME =
+ "UntranslatableReplacementStringInSubtypeName";
+
+ /**
+ * The subtype extra value used to indicate that the subtype keyboard layout set name.
+ * This extra value is private to LatinIME.
+ */
+ public static final String KEYBOARD_LAYOUT_SET = "KeyboardLayoutSet";
+
+ /**
* The subtype extra value used to indicate that the subtype is additional subtype
* that the user defined. This extra value is private to LatinIME.
*/
diff --git a/java/src/com/android/inputmethod/latin/SubtypeLocale.java b/java/src/com/android/inputmethod/latin/SubtypeLocale.java
index 7694b56fc..ca293060a 100644
--- a/java/src/com/android/inputmethod/latin/SubtypeLocale.java
+++ b/java/src/com/android/inputmethod/latin/SubtypeLocale.java
@@ -17,9 +17,12 @@
package com.android.inputmethod.latin;
import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.KEYBOARD_LAYOUT_SET;
+import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME;
import android.content.Context;
import android.content.res.Resources;
+import android.os.Build;
+import android.util.Log;
import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.latin.LocaleUtils.RunInLocale;
@@ -28,7 +31,7 @@ import java.util.HashMap;
import java.util.Locale;
public class SubtypeLocale {
- private static final String TAG = SubtypeLocale.class.getSimpleName();
+ static final String TAG = SubtypeLocale.class.getSimpleName();
// This class must be located in the same package as LatinIME.java.
private static final String RESOURCE_PACKAGE_NAME =
DictionaryFactory.class.getPackage().getName();
@@ -38,16 +41,20 @@ public class SubtypeLocale {
public static final String QWERTY = "qwerty";
public static final int UNKNOWN_KEYBOARD_LAYOUT = R.string.subtype_generic;
- private static Context sContext;
private static String[] sPredefinedKeyboardLayoutSet;
// Keyboard layout to its display name map.
- private static final HashMap<String, String> sKeyboardKayoutToDisplayNameMap =
+ private static final HashMap<String, String> sKeyboardLayoutToDisplayNameMap =
new HashMap<String, String>();
// Keyboard layout to subtype name resource id map.
private static final HashMap<String, Integer> sKeyboardLayoutToNameIdsMap =
new HashMap<String, Integer>();
+ // Exceptional locale to subtype name resource id map.
+ private static final HashMap<String, Integer> sExceptionalLocaleToWithLayoutNameIdsMap =
+ new HashMap<String, Integer>();
private static final String SUBTYPE_NAME_RESOURCE_GENERIC_PREFIX =
"string/subtype_generic_";
+ private static final String SUBTYPE_NAME_RESOURCE_WITH_LAYOUT_PREFIX =
+ "string/subtype_with_layout_";
private static final String SUBTYPE_NAME_RESOURCE_NO_LANGUAGE_PREFIX =
"string/subtype_no_language_";
// Exceptional locales to display name map.
@@ -59,7 +66,6 @@ public class SubtypeLocale {
}
public static void init(Context context) {
- sContext = context;
final Resources res = context.getResources();
final String[] predefinedLayoutSet = res.getStringArray(R.array.predefined_layouts);
@@ -68,7 +74,7 @@ public class SubtypeLocale {
R.array.predefined_layout_display_names);
for (int i = 0; i < predefinedLayoutSet.length; i++) {
final String layoutName = predefinedLayoutSet[i];
- sKeyboardKayoutToDisplayNameMap.put(layoutName, layoutDisplayNames[i]);
+ sKeyboardLayoutToDisplayNameMap.put(layoutName, layoutDisplayNames[i]);
final String resourceName = SUBTYPE_NAME_RESOURCE_GENERIC_PREFIX + layoutName;
final int resId = res.getIdentifier(resourceName, null, RESOURCE_PACKAGE_NAME);
sKeyboardLayoutToNameIdsMap.put(layoutName, resId);
@@ -85,7 +91,11 @@ public class SubtypeLocale {
final String[] exceptionalDisplayNames = res.getStringArray(
R.array.subtype_locale_exception_values);
for (int i = 0; i < exceptionalLocales.length; i++) {
- sExceptionalDisplayNamesMap.put(exceptionalLocales[i], exceptionalDisplayNames[i]);
+ final String localeString = exceptionalLocales[i];
+ sExceptionalDisplayNamesMap.put(localeString, exceptionalDisplayNames[i]);
+ final String resourceName = SUBTYPE_NAME_RESOURCE_WITH_LAYOUT_PREFIX + localeString;
+ final int resId = res.getIdentifier(resourceName, null, RESOURCE_PACKAGE_NAME);
+ sExceptionalLocaleToWithLayoutNameIdsMap.put(localeString, resId);
}
}
@@ -93,11 +103,18 @@ public class SubtypeLocale {
return sPredefinedKeyboardLayoutSet;
}
+ public static boolean isExceptionalLocale(String localeString) {
+ return sExceptionalLocaleToWithLayoutNameIdsMap.containsKey(localeString);
+ }
+
private static final String getNoLanguageLayoutKey(String keyboardLayoutName) {
return NO_LANGUAGE + "_" + keyboardLayoutName;
}
public static int getSubtypeNameId(String localeString, String keyboardLayoutName) {
+ if (Build.VERSION.SDK_INT >= /* JELLY_BEAN */ 15 && isExceptionalLocale(localeString)) {
+ return sExceptionalLocaleToWithLayoutNameIdsMap.get(localeString);
+ }
final String key = localeString.equals(NO_LANGUAGE)
? getNoLanguageLayoutKey(keyboardLayoutName)
: keyboardLayoutName;
@@ -129,16 +146,26 @@ public class SubtypeLocale {
// en_US azerty T English (US) (AZERTY)
// zz azerty T No language (AZERTY) in system locale
- public static String getSubtypeDisplayName(InputMethodSubtype subtype, Resources res) {
- // TODO: Remove this check when InputMethodManager.getLastInputMethodSubtype is
- // fixed.
- if (!ImfUtils.checkIfSubtypeBelongsToThisIme(sContext, subtype)) return "";
- final String language = getSubtypeLocaleDisplayName(subtype.getLocale());
+ public static String getSubtypeDisplayName(final InputMethodSubtype subtype, Resources res) {
+ final String replacementString = (Build.VERSION.SDK_INT >= /* JELLY_BEAN */ 15
+ && subtype.containsExtraValueKey(UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME))
+ ? subtype.getExtraValueOf(UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME)
+ : getSubtypeLocaleDisplayName(subtype.getLocale());
final int nameResId = subtype.getNameResId();
final RunInLocale<String> getSubtypeName = new RunInLocale<String>() {
@Override
protected String job(Resources res) {
- return res.getString(nameResId, language);
+ try {
+ return res.getString(nameResId, replacementString);
+ } catch (Resources.NotFoundException e) {
+ // TODO: Remove this catch when InputMethodManager.getCurrentInputMethodSubtype
+ // is fixed.
+ Log.w(TAG, "Unknown subtype: mode=" + subtype.getMode()
+ + " locale=" + subtype.getLocale()
+ + " extra=" + subtype.getExtraValue()
+ + "\n" + Utils.getStackTrace());
+ return "";
+ }
}
};
final Locale locale = isNoLanguage(subtype)
@@ -158,7 +185,11 @@ public class SubtypeLocale {
public static String getKeyboardLayoutSetDisplayName(InputMethodSubtype subtype) {
final String layoutName = getKeyboardLayoutSetName(subtype);
- return sKeyboardKayoutToDisplayNameMap.get(layoutName);
+ return getKeyboardLayoutSetDisplayName(layoutName);
+ }
+
+ public static String getKeyboardLayoutSetDisplayName(String layoutName) {
+ return sKeyboardLayoutToDisplayNameMap.get(layoutName);
}
public static String getKeyboardLayoutSetName(InputMethodSubtype subtype) {
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 67f96281d..220b340d1 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -153,8 +153,19 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
return pos;
}
-void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
- const int prevWordLength, std::map<int, int> *map) {
+static inline void setInFilter(uint8_t *filter, const int position) {
+ const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
+ filter[bucket >> 3] |= (1 << (bucket & 0x7));
+}
+
+static inline bool isInFilter(uint8_t *filter, const int position) {
+ const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
+ return filter[bucket >> 3] & (1 << (bucket & 0x7));
+}
+
+void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord,
+ const int prevWordLength, std::map<int, int> *map, uint8_t *filter) {
+ memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
const uint8_t* const root = DICT;
int pos = getBigramListPositionForWord(prevWord, prevWordLength);
if (0 == pos) return;
@@ -166,6 +177,7 @@ void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
(*map)[bigramPos] = frequency;
+ setInFilter(filter, bigramPos);
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
}
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index b1233215b..7328d5828 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -20,6 +20,8 @@
#include <map>
#include <stdint.h>
+#include "defines.h"
+
namespace latinime {
class Dictionary;
@@ -29,8 +31,8 @@ class BigramDictionary {
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
- void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
- std::map<int, int> *map);
+ void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
+ std::map<int, int> *map, uint8_t *filter);
~BigramDictionary();
private:
bool addWordBigram(unsigned short *word, int length, int frequency);
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index d5d67c108..71ade48a3 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -66,7 +66,8 @@ class BinaryFormat {
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
uint16_t* outWord);
- static int getProbability(const int bigramListPosition, const int unigramFreq);
+ static int getProbability(const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const int unigramFreq);
// Flags for special processing
// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
@@ -519,9 +520,11 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
}
// This should probably return a probability in log space.
-inline int BinaryFormat::getProbability(const int bigramListPosition, const int unigramFreq) {
- // TODO: use the bigram list position to get the bigram probability. If the bigram
- // is not found, use the unigram frequency.
+inline int BinaryFormat::getProbability(const std::map<int, int> *bigramMap,
+ const uint8_t *bigramFilter, const int unigramFreq) {
+ // TODO: use the bigram filter for fast rejection, then the bigram map for lookup
+ // to get the bigram probability. If the bigram is not found, use the unigram frequency.
+ // Don't forget that they can be null.
// TODO: if the unigram frequency is used, compute the actual probability
return unigramFreq;
}
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index c99f8a8b2..cb3dbb115 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -241,6 +241,24 @@ static inline void prof_out(void) {
#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
+// Size, in bytes, of the bloom filter index for bigrams
+// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
+// where k is the number of hash functions, n the number of bigrams, and m the number of
+// bits we can test.
+// At the moment 100 is the maximum number of bigrams for a word with the current
+// dictionaries, so n = 100. 1024 buckets give us m = 1024.
+// With 1 hash function, our false positive rate is about 9.3%, which should be enough for
+// our uses since we are only using this to increase average performance. For the record,
+// k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%,
+// and m = 4096 gives 2.4%.
+#define BIGRAM_FILTER_BYTE_SIZE 128
+// Must be smaller than BIGRAM_FILTER_BYTE_SIZE * 8, and preferably prime. 1021 is the largest
+// prime under 128 * 8.
+#define BIGRAM_FILTER_MODULO 1021
+#if BIGRAM_FILTER_BYTE_SIZE * 8 < BIGRAM_FILTER_MODULO
+#error "BIGRAM_FILTER_MODULO is larger than BIGRAM_FILTER_BYTE_SIZE"
+#endif
+
template<typename T> inline T min(T a, T b) { return a < b ? a : b; }
template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index a2b0491c5..bce86d1ad 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -37,16 +37,13 @@ class Dictionary {
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
int *codes, int codesSize, const int32_t* prevWordChars, const int prevWordLength,
bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
- // bigramListPosition is, as an int, the offset of the bigram list in the file.
- // If none, it's zero.
- const int bigramListPosition = !prevWordChars ? 0
- : mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
std::map<int, int> bigramMap;
- mBigramDictionary->fillBigramAddressToFrequencyMap(prevWordChars, prevWordLength,
- &bigramMap);
+ uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
+ mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
+ prevWordLength, &bigramMap, bigramFilter);
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
- mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
- useFullEditDistance, outWords, frequencies);
+ mCorrection, xcoordinates, ycoordinates, codes, codesSize, &bigramMap,
+ bigramFilter, useFullEditDistance, outWords, frequencies);
}
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 0c759d438..2e5468dd7 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -98,7 +98,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
int *xCoordinatesBuffer, int *yCoordinatesBuffer,
- const int codesBufferSize, const int bigramListPosition,
+ const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, const int *codesSrc,
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
WordsPriorityQueuePool *queuePool,
@@ -128,7 +128,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
replacementCodePoint;
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
- bigramListPosition, useFullEditDistance, codesSrc + i + 1,
+ bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1,
codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
queuePool, digraphs, digraphsSize);
@@ -138,7 +138,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
- bigramListPosition, useFullEditDistance, codesSrc + i, codesRemain - i,
+ bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i,
currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
digraphsSize);
return;
@@ -161,16 +161,18 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
}
getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
- startIndex + codesRemain, bigramListPosition, useFullEditDistance, correction,
+ startIndex + codesRemain, bigramMap, bigramFilter, useFullEditDistance, correction,
queuePool);
}
-// bigramListPosition is the offset in the file to the list of bigrams for the previous word.
+// bigramMap contains the association <bigram address> -> <bigram frequency>
+// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
+// in bigram_dictionary.cpp
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
- const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords,
- int *frequencies) {
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
queuePool->clearAll();
Correction* masterCorrection = correction;
@@ -180,7 +182,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
int xCoordinatesBuffer[codesSize];
int yCoordinatesBuffer[codesSize];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
+ xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
queuePool, GERMAN_UMLAUT_DIGRAPHS,
sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
@@ -189,13 +191,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
int xCoordinatesBuffer[codesSize];
int yCoordinatesBuffer[codesSize];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
+ xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
queuePool, FRENCH_LIGATURES_DIGRAPHS,
sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
} else { // Normal processing
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
- bigramListPosition, useFullEditDistance, masterCorrection, queuePool);
+ bigramMap, bigramFilter, useFullEditDistance, masterCorrection, queuePool);
}
PROF_START(20);
@@ -228,15 +230,15 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const int inputLength, const int bigramListPosition, const bool useFullEditDistance,
- Correction *correction, WordsPriorityQueuePool *queuePool) {
+ const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) {
PROF_OPEN;
PROF_START(0);
PROF_END(0);
PROF_START(1);
- getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramListPosition,
+ getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter,
useFullEditDistance, inputLength, correction, queuePool);
PROF_END(1);
@@ -308,15 +310,16 @@ static const char SPACE = ' ';
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const int bigramListPosition, const bool useFullEditDistance, const int inputLength,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool *queuePool) {
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
- getSuggestionCandidates(useFullEditDistance, inputLength, bigramListPosition, correction,
+ getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction,
queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
}
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
- const int inputLength, const int bigramListPosition,
+ const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
// TODO: Remove setCorrectionParams
@@ -337,7 +340,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- bigramListPosition, correction, &childCount, &firstChildPos, &siblingPos,
+ bigramMap, bigramFilter, correction, &childCount, &firstChildPos, &siblingPos,
queuePool, currentWordIndex);
// Update next sibling pos
correction->setTreeSiblingPos(outputIndex, siblingPos);
@@ -432,8 +435,8 @@ bool UnigramDictionary::getSubStringSuggestion(
queuePool->clearSubQueue(currentWordIndex);
// TODO: pass the bigram list for substring suggestion
getSuggestionCandidates(useFullEditDistance, inputWordLength,
- 0 /* bigramListPosition */, correction, queuePool, false /* doAutoCompletion */,
- MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
+ 0 /* bigramMap */, 0 /* bigramFilter */, correction, queuePool,
+ false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
if (DEBUG_DICT) {
if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
@@ -763,9 +766,9 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
- const int bigramListPosition, Correction *correction, int *newCount,
- int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
- const int currentWordIndex) {
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
+ int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
+ WordsPriorityQueuePool *queuePool, const int currentWordIndex) {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -846,9 +849,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
- // The bigramListPosition is the offset in the file of the bigrams for the previous word,
- // or zero if we don't know of any bigrams for it.
- const int probability = BinaryFormat::getProbability(bigramListPosition, unigramFreq);
+ // bigramMap contains the bigram frequencies indexed by addresses for fast lookup.
+ // bigramFilter is a bloom filter of said frequencies for even faster rejection.
+ const int probability = BinaryFormat::getProbability(bigramMap, bigramFilter, unigramFreq);
onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
currentWordIndex);
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index 0cc59bac8..b9233518f 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -17,6 +17,7 @@
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
#define LATINIME_UNIGRAM_DICTIONARY_H
+#include <map>
#include <stdint.h>
#include "correction.h"
#include "correction_state.h"
@@ -75,32 +76,36 @@ class UnigramDictionary {
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
Correction *correction, const int *xcoordinates, const int *ycoordinates,
- const int *codes, const int codesSize, const int bigramListPosition,
- const bool useFullEditDistance, unsigned short *outWords, int *frequencies);
+ const int *codes, const int codesSize, const std::map<int, int> *bigramMap,
+ const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords,
+ int *frequencies);
virtual ~UnigramDictionary();
private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int inputLength,
- const int bigramListPosition, const bool useFullEditDistance, Correction *correction,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool);
int getDigraphReplacement(const int *codes, const int i, const int codesSize,
const digraph_t* const digraphs, const unsigned int digraphsSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
- const int bigramListPosition, const bool useFullEditDistance, const int* codesSrc,
- const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, const int* codesSrc, const int codesRemain,
+ const int currentDepth, int* codesDest, Correction *correction,
WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
const unsigned int digraphsSize);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int bigramListPosition,
- const bool useFullEditDistance, const int inputLength, Correction *correction,
- WordsPriorityQueuePool* queuePool);
+ const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
+ const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength,
+ Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates(
- const bool useFullEditDistance, const int inputLength, const int bigramListPosition,
+ const bool useFullEditDistance, const int inputLength,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
const int maxErrors, const int currentWordIndex);
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
@@ -114,9 +119,10 @@ class UnigramDictionary {
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
- bool processCurrentNode(const int initialPos, const int bigramListPosition,
- Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition,
- WordsPriorityQueuePool *queuePool, const int currentWordIndex);
+ bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
+ const uint8_t *bigramFilter, Correction *correction, int *newCount,
+ int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
+ const int currentWordIndex);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
diff --git a/tests/src/com/android/inputmethod/latin/SubtypeLocaleTests.java b/tests/src/com/android/inputmethod/latin/SubtypeLocaleTests.java
index 8863bcf47..c70c2fde5 100644
--- a/tests/src/com/android/inputmethod/latin/SubtypeLocaleTests.java
+++ b/tests/src/com/android/inputmethod/latin/SubtypeLocaleTests.java
@@ -30,14 +30,12 @@ public class SubtypeLocaleTests extends AndroidTestCase {
// Locale to subtypes list.
private final ArrayList<InputMethodSubtype> mSubtypesList = new ArrayList<InputMethodSubtype>();
- private Context mContext;
private Resources mRes;
@Override
protected void setUp() throws Exception {
super.setUp();
final Context context = getContext();
- mContext = context;
mRes = context.getResources();
SubtypeLocale.init(context);
}
@@ -124,10 +122,6 @@ public class SubtypeLocaleTests extends AndroidTestCase {
final InputMethodSubtype ZZ_AZERTY = AdditionalSubtype.createAdditionalSubtype(
SubtypeLocale.NO_LANGUAGE, "azerty", null);
- ImfUtils.setAdditionalInputMethodSubtypes(mContext, new InputMethodSubtype[] {
- DE_QWERTY, FR_QWERTZ, US_AZERTY, ZZ_AZERTY
- });
-
final RunInLocale<Void> tests = new RunInLocale<Void>() {
@Override
protected Void job(Resources res) {
@@ -191,10 +185,6 @@ public class SubtypeLocaleTests extends AndroidTestCase {
final InputMethodSubtype ZZ_AZERTY = AdditionalSubtype.createAdditionalSubtype(
SubtypeLocale.NO_LANGUAGE, "azerty", null);
- ImfUtils.setAdditionalInputMethodSubtypes(mContext, new InputMethodSubtype[] {
- DE_QWERTY, FR_QWERTZ, US_AZERTY, ZZ_AZERTY
- });
-
final RunInLocale<Void> tests = new RunInLocale<Void>() {
@Override
protected Void job(Resources res) {