aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java37
-rw-r--r--java/src/com/android/inputmethod/latin/utils/WordProperty.java54
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp12
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.cpp15
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.h4
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java65
6 files changed, 110 insertions, 77 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 95823dac5..6e0cdf2b1 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -68,11 +68,12 @@ public final class BinaryDictionary extends Dictionary {
private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2;
private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
- // Format to get unigram historical info from native side via getWordPropertyNative().
- private static final int FORMAT_WORD_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
- private static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 0;
- private static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 1;
- private static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 2;
+ // Format to get probability and historical info from native side via getWordPropertyNative().
+ public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
+ public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
+ public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
+ public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
+ public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
private long mNativeDict;
private final Locale mLocale;
@@ -144,9 +145,9 @@ public final class BinaryDictionary extends Dictionary {
private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
private static native void getWordPropertyNative(long dict, int[] word,
- int[] outCodePoints, boolean[] outFlags, int[] outProbability,
- int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
- ArrayList<Integer> outShortcutProbabilities);
+ int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
+ ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
+ ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
private static native int getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
@@ -313,22 +314,22 @@ public final class BinaryDictionary extends Dictionary {
final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] outCodePoints = new int[MAX_WORD_LENGTH];
final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
- final int[] outProbability = new int[1];
- final int[] outHistoricalInfo =
- new int[FORMAT_WORD_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
+ final int[] outProbabilityInfo =
+ new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
+ final ArrayList<int[]> outBigramTargets = CollectionUtils.newArrayList();
+ final ArrayList<int[]> outBigramProbabilityInfo = CollectionUtils.newArrayList();
final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
- getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
- outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+ getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo,
+ outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outShortcutProbabilities);
return new WordProperty(codePoints,
outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX],
- outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
- outHistoricalInfo[FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
- outHistoricalInfo[FORMAT_WORD_PROPERTY_LEVEL_INDEX],
- outHistoricalInfo[FORMAT_WORD_PROPERTY_COUNT_INDEX],
- outShortcutTargets, outShortcutProbabilities);
+ outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo,
+ outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outShortcutProbabilities);
}
// Add a unigram entry to binary dictionary with unigram attributes in native code.
diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
index d6c0f900a..ba9b114b0 100644
--- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
@@ -32,15 +32,30 @@ public class WordProperty {
public final boolean mIsBlacklisted;
public final boolean mHasBigrams;
public final boolean mHasShortcuts;
- public final int mProbability;
- // mTimestamp, mLevel and mCount are historical info. These values are depend on the
- // implementation in native code; thus, we must not use them and have any assumptions about
- // them except for tests.
- public final int mTimestamp;
- public final int mLevel;
- public final int mCount;
+ public final ProbabilityInfo mProbabilityInfo;
+ public final ArrayList<WeightedString> mBigramTargets = CollectionUtils.newArrayList();
+ public final ArrayList<ProbabilityInfo> mBigramProbabilityInfo = CollectionUtils.newArrayList();
public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
+ // TODO: Use this kind of Probability class for dictionary read/write code under the makedict
+ // package.
+ public static final class ProbabilityInfo {
+ public final int mProbability;
+ // wTimestamp, mLevel and mCount are historical info. These values are depend on the
+ // implementation in native code; thus, we must not use them and have any assumptions about
+ // them except for tests.
+ public final int mTimestamp;
+ public final int mLevel;
+ public final int mCount;
+
+ public ProbabilityInfo(final int[] probabilityInfo) {
+ mProbability = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX];
+ mTimestamp = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX];
+ mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
+ mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
+ }
+ }
+
private static int getCodePointCount(final int[] codePoints) {
for (int i = 0; i < codePoints.length; i++) {
if (codePoints[i] == 0) {
@@ -53,18 +68,29 @@ public class WordProperty {
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
public WordProperty(final int[] codePoints, final boolean isNotAWord,
final boolean isBlacklisted, final boolean hasBigram,
- final boolean hasShortcuts, final int probability, final int timestamp,
- final int level, final int count, final ArrayList<int[]> shortcutTargets,
+ final boolean hasShortcuts, final int[] probabilityInfo,
+ final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
+ final ArrayList<int[]> shortcutTargets,
final ArrayList<Integer> shortcutProbabilities) {
mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
mIsNotAWord = isNotAWord;
mIsBlacklisted = isBlacklisted;
mHasBigrams = hasBigram;
mHasShortcuts = hasShortcuts;
- mProbability = probability;
- mTimestamp = timestamp;
- mLevel = level;
- mCount = count;
+ mProbabilityInfo = new ProbabilityInfo(probabilityInfo);
+
+ final int bigramTargetCount = bigramTargets.size();
+ for (int i = 0; i < bigramTargetCount; i++) {
+ final int[] bigramTargetCodePointArray = bigramTargets.get(i);
+ final String bigramTargetString = new String(bigramTargetCodePointArray,
+ 0 /* offset */, getCodePointCount(bigramTargetCodePointArray));
+ final ProbabilityInfo bigramProbability =
+ new ProbabilityInfo(bigramProbabilityInfo.get(i));
+ mBigramTargets.add(
+ new WeightedString(bigramTargetString, bigramProbability.mProbability));
+ mBigramProbabilityInfo.add(bigramProbability);
+ }
+
final int shortcutTargetCount = shortcutTargets.size();
for (int i = 0; i < shortcutTargetCount; i++) {
final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
@@ -77,6 +103,6 @@ public class WordProperty {
@UsedForTesting
public boolean isValid() {
- return mProbability != BinaryDictionary.NOT_A_PROBABILITY;
+ return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
}
} \ No newline at end of file
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 16a3fe825..8f3f8e21e 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -262,16 +262,17 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) {
+ jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
- wordProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
- outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+ wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
+ outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outShortcutProbabilities);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
@@ -521,7 +522,8 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getWordPropertyNative"),
- const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
+ const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
+ "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp
index 4a260a982..d8c330bbd 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/word_property.cpp
@@ -19,20 +19,23 @@
namespace latinime {
void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
- jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
- jobject outShortcutTargets, jobject outShortcutProbabilities) const {
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+ jobject outBigramProbabilities, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) const {
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
- env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
- int historicalInfo[] = {mTimestamp, mLevel, mCount};
- env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
- historicalInfo);
+ int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount};
+ env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+ probabilityInfo);
jclass integerClass = env->FindClass("java/lang/Integer");
jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+
+ // TODO: Output bigrams.
+ // Output shortcuts.
const int shortcutTargetCount = mShortcuts.size();
for (int i = 0; i < shortcutTargetCount; ++i) {
const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints();
diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h
index 69c880861..cc06b1baa 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/word_property.h
@@ -78,8 +78,8 @@ class WordProperty {
mShortcuts(*shortcuts) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) const;
+ jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 844fcbbd9..5294bb006 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -871,11 +871,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
}
- public void testGetUnigramProperties() {
- testGetUnigramProperties(FormatSpec.VERSION4);
+ public void testGetWordProperties() {
+ testGetWordProperties(FormatSpec.VERSION4);
}
- private void testGetUnigramProperties(final int formatVersion) {
+ private void testGetWordProperties(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int ITERATION_COUNT = 1000;
@@ -892,8 +892,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final WordProperty invalidUnigramProperty = binaryDictionary.getWordProperty("dummyWord");
- assertFalse(invalidUnigramProperty.isValid());
+ final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
+ assertFalse(invalidWordProperty.isValid());
for (int i = 0; i < ITERATION_COUNT; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
@@ -904,15 +904,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramWord(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
- final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
- assertEquals(word, unigramProperty.mCodePoints);
- assertTrue(unigramProperty.isValid());
- assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
- assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
- assertEquals(false, unigramProperty.mHasBigrams);
- assertEquals(false, unigramProperty.mHasShortcuts);
- assertEquals(unigramProbability, unigramProperty.mProbability);
- assertTrue(unigramProperty.mShortcutTargets.isEmpty());
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+ assertEquals(word, wordProperty.mCodePoints);
+ assertTrue(wordProperty.isValid());
+ assertEquals(isNotAWord, wordProperty.mIsNotAWord);
+ assertEquals(isBlacklisted, wordProperty.mIsBlacklisted);
+ assertEquals(false, wordProperty.mHasBigrams);
+ assertEquals(false, wordProperty.mHasShortcuts);
+ assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
+ assertTrue(wordProperty.mShortcutTargets.isEmpty());
}
}
@@ -936,28 +936,28 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
- WordProperty unigramProperty = binaryDictionary.getWordProperty("aaa");
- assertEquals(1, unigramProperty.mShortcutTargets.size());
- assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord);
- assertEquals(shortcutProbability, unigramProperty.mShortcutTargets.get(0).mFrequency);
+ WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
+ assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).mFrequency);
final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
- unigramProperty = binaryDictionary.getWordProperty("aaa");
- assertEquals(1, unigramProperty.mShortcutTargets.size());
- assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord);
+ wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability,
- unigramProperty.mShortcutTargets.get(0).mFrequency);
+ wordProperty.mShortcutTargets.get(0).mFrequency);
binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
- unigramProperty = binaryDictionary.getWordProperty("aaa");
- assertEquals(2, unigramProperty.mShortcutTargets.size());
- for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
+ wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(2, wordProperty.mShortcutTargets.size());
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
shortcutTargets.remove(shortcutTarget.mWord);
@@ -965,9 +965,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
binaryDictionary.flushWithGC();
- unigramProperty = binaryDictionary.getWordProperty("aaa");
- assertEquals(2, unigramProperty.mShortcutTargets.size());
- for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
+ wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(2, wordProperty.mShortcutTargets.size());
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
shortcutTargets.remove(shortcutTarget.mWord);
@@ -1034,14 +1034,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
for (final String word : words) {
- final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
- assertEquals((int)unigramProbabilities.get(word), unigramProperty.mProbability);
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+ assertEquals((int)unigramProbabilities.get(word),
+ wordProperty.mProbabilityInfo.mProbability);
if (!shortcutTargets.containsKey(word)) {
// The word does not have shortcut targets.
continue;
}
- assertEquals(shortcutTargets.get(word).size(), unigramProperty.mShortcutTargets.size());
- for (final WeightedString shortcutTarget : unigramProperty.mShortcutTargets) {
+ assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
+ for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
final String targetCodePonts = shortcutTarget.mWord;
assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
shortcutTarget.mFrequency);