aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-12-06 08:53:21 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-12-06 08:53:21 +0000
commitfc16c320c6e759974308272e6c66a6921141b384 (patch)
tree7d1516cfced6ac0a7ae4811d1cfc0c637d393627 /java/src
parent5bf55a86d0dd55ade42833fdb7cf654b2aeddb4e (diff)
parent9fb28f78f7b67bb1ab7412e0abfd7d104004b623 (diff)
downloadlatinime-fc16c320c6e759974308272e6c66a6921141b384.tar.gz
latinime-fc16c320c6e759974308272e6c66a6921141b384.tar.xz
latinime-fc16c320c6e759974308272e6c66a6921141b384.zip
Merge "Add a jni method to fetch unigram information."
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java44
-rw-r--r--java/src/com/android/inputmethod/latin/utils/UnigramProperty.java82
2 files changed, 126 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index a41cc6a82..db4234c63 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -26,6 +26,7 @@ import com.android.inputmethod.latin.settings.NativeSuggestOptions;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.StringUtils;
+import com.android.inputmethod.latin.utils.UnigramProperty;
import java.io.File;
import java.util.ArrayList;
@@ -59,6 +60,19 @@ public final class BinaryDictionary extends Dictionary {
public static final int NOT_A_VALID_TIMESTAMP = -1;
+ // Format to get unigram flags from native side via getUnigramPropertyNative().
+ private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT = 4;
+ private static final int FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
+ private static final int FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX = 1;
+ private static final int FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX = 2;
+ private static final int FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
+
+ // Format to get unigram historical info from native side via getUnigramPropertyNative().
+ private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
+ private static final int FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX = 0;
+ private static final int FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX = 1;
+ private static final int FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX = 2;
+
private long mNativeDict;
private final Locale mLocale;
private final long mDictSize;
@@ -128,6 +142,10 @@ public final class BinaryDictionary extends Dictionary {
private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
+ private static native void getUnigramPropertyNative(long dict, int[] word,
+ int[] outCodePoints, boolean[] outFlags, int[] outProbability,
+ int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
+ ArrayList<Integer> outShortcutProbabilities);
private static native int getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
@@ -285,6 +303,32 @@ public final class BinaryDictionary extends Dictionary {
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
}
+ @UsedForTesting
+ public UnigramProperty getUnigramProperty(final String word) {
+ if (TextUtils.isEmpty(word)) {
+ return null;
+ }
+ final int[] codePoints = StringUtils.toCodePointArray(word);
+ final int[] outCodePoints = new int[MAX_WORD_LENGTH];
+ final boolean[] outFlags = new boolean[FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT];
+ final int[] outProbability = new int[1];
+ final int[] outHistoricalInfo =
+ new int[FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
+ final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
+ final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
+ getUnigramPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
+ outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+ return new UnigramProperty(codePoints,
+ outFlags[FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX],
+ outFlags[FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX],
+ outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX],
+ outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
+ outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX],
+ outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX],
+ outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX],
+ outShortcutTargets, outShortcutProbabilities);
+ }
+
// Add a unigram entry to binary dictionary with unigram attributes in native code.
public void addUnigramWord(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
diff --git a/java/src/com/android/inputmethod/latin/utils/UnigramProperty.java b/java/src/com/android/inputmethod/latin/utils/UnigramProperty.java
new file mode 100644
index 000000000..4feee4393
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/UnigramProperty.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package com.android.inputmethod.latin.utils;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.util.ArrayList;
+
+// This has information that belong to a unigram. This class has some detailed attributes such as
+// historical information but they have to be checked only for testing purpose.
+@UsedForTesting
+public class UnigramProperty {
+ public final String mCodePoints;
+ public final boolean mIsNotAWord;
+ public final boolean mIsBlacklisted;
+ public final boolean mHasBigrams;
+ public final boolean mHasShortcuts;
+ public final int mProbability;
+ // mTimestamp, mLevel and mCount are historical info. These values are depend on the
+ // implementation in native code; thus, we must not use them and have any assumptions about
+ // them except for tests.
+ public final int mTimestamp;
+ public final int mLevel;
+ public final int mCount;
+ public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
+
+ private static int getCodePointCount(final int[] codePoints) {
+ for (int i = 0; i < codePoints.length; i++) {
+ if (codePoints[i] == 0) {
+ return i;
+ }
+ }
+ return codePoints.length;
+ }
+
+ // This represents invalid unigram when the probability is BinaryDictionary.NOT_A_PROBABILITY.
+ public UnigramProperty(final int[] codePoints, final boolean isNotAWord,
+ final boolean isBlacklisted, final boolean hasBigram,
+ final boolean hasShortcuts, final int probability, final int timestamp,
+ final int level, final int count, final ArrayList<int[]> shortcutTargets,
+ final ArrayList<Integer> shortcutProbabilities) {
+ mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
+ mIsNotAWord = isNotAWord;
+ mIsBlacklisted = isBlacklisted;
+ mHasBigrams = hasBigram;
+ mHasShortcuts = hasShortcuts;
+ mProbability = probability;
+ mTimestamp = timestamp;
+ mLevel = level;
+ mCount = count;
+ final int shortcutTargetCount = shortcutTargets.size();
+ for (int i = 0; i < shortcutTargetCount; i++) {
+ final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
+ final String shortcutTargetString = new String(shortcutTargetCodePointArray,
+ 0 /* offset */, getCodePointCount(shortcutTargetCodePointArray));
+ mShortcutTargets.add(
+ new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
+ }
+ }
+
+ @UsedForTesting
+ public boolean isValid() {
+ return mProbability != BinaryDictionary.NOT_A_PROBABILITY;
+ }
+} \ No newline at end of file