aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java44
-rw-r--r--java/src/com/android/inputmethod/latin/utils/UnigramProperty.java82
-rw-r--r--native/jni/Android.mk3
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp22
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.cpp52
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.h87
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp41
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h3
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java49
13 files changed, 403 insertions, 2 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index a41cc6a82..db4234c63 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -26,6 +26,7 @@ import com.android.inputmethod.latin.settings.NativeSuggestOptions;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.StringUtils;
+import com.android.inputmethod.latin.utils.UnigramProperty;
import java.io.File;
import java.util.ArrayList;
@@ -59,6 +60,19 @@ public final class BinaryDictionary extends Dictionary {
public static final int NOT_A_VALID_TIMESTAMP = -1;
+ // Format to get unigram flags from native side via getUnigramPropertyNative().
+ private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT = 4;
+ private static final int FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
+ private static final int FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX = 1;
+ private static final int FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX = 2;
+ private static final int FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
+
+ // Format to get unigram historical info from native side via getUnigramPropertyNative().
+ private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
+ private static final int FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX = 0;
+ private static final int FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX = 1;
+ private static final int FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX = 2;
+
private long mNativeDict;
private final Locale mLocale;
private final long mDictSize;
@@ -128,6 +142,10 @@ public final class BinaryDictionary extends Dictionary {
private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
+ private static native void getUnigramPropertyNative(long dict, int[] word,
+ int[] outCodePoints, boolean[] outFlags, int[] outProbability,
+ int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
+ ArrayList<Integer> outShortcutProbabilities);
private static native int getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
@@ -285,6 +303,32 @@ public final class BinaryDictionary extends Dictionary {
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
}
+ @UsedForTesting
+ public UnigramProperty getUnigramProperty(final String word) {
+ if (TextUtils.isEmpty(word)) {
+ return null;
+ }
+ final int[] codePoints = StringUtils.toCodePointArray(word);
+ final int[] outCodePoints = new int[MAX_WORD_LENGTH];
+ final boolean[] outFlags = new boolean[FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT];
+ final int[] outProbability = new int[1];
+ final int[] outHistoricalInfo =
+ new int[FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
+ final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
+ final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
+ getUnigramPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
+ outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+ return new UnigramProperty(codePoints,
+ outFlags[FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX],
+ outFlags[FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX],
+ outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX],
+ outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
+ outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX],
+ outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX],
+ outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX],
+ outShortcutTargets, outShortcutProbabilities);
+ }
+
// Add a unigram entry to binary dictionary with unigram attributes in native code.
public void addUnigramWord(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
diff --git a/java/src/com/android/inputmethod/latin/utils/UnigramProperty.java b/java/src/com/android/inputmethod/latin/utils/UnigramProperty.java
new file mode 100644
index 000000000..4feee4393
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/UnigramProperty.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package com.android.inputmethod.latin.utils;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.util.ArrayList;
+
+// This has information that belong to a unigram. This class has some detailed attributes such as
+// historical information but they have to be checked only for testing purpose.
+@UsedForTesting
+public class UnigramProperty {
+ public final String mCodePoints;
+ public final boolean mIsNotAWord;
+ public final boolean mIsBlacklisted;
+ public final boolean mHasBigrams;
+ public final boolean mHasShortcuts;
+ public final int mProbability;
+ // mTimestamp, mLevel and mCount are historical info. These values are depend on the
+ // implementation in native code; thus, we must not use them and have any assumptions about
+ // them except for tests.
+ public final int mTimestamp;
+ public final int mLevel;
+ public final int mCount;
+ public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
+
+ private static int getCodePointCount(final int[] codePoints) {
+ for (int i = 0; i < codePoints.length; i++) {
+ if (codePoints[i] == 0) {
+ return i;
+ }
+ }
+ return codePoints.length;
+ }
+
+ // This represents invalid unigram when the probability is BinaryDictionary.NOT_A_PROBABILITY.
+ public UnigramProperty(final int[] codePoints, final boolean isNotAWord,
+ final boolean isBlacklisted, final boolean hasBigram,
+ final boolean hasShortcuts, final int probability, final int timestamp,
+ final int level, final int count, final ArrayList<int[]> shortcutTargets,
+ final ArrayList<Integer> shortcutProbabilities) {
+ mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
+ mIsNotAWord = isNotAWord;
+ mIsBlacklisted = isBlacklisted;
+ mHasBigrams = hasBigram;
+ mHasShortcuts = hasShortcuts;
+ mProbability = probability;
+ mTimestamp = timestamp;
+ mLevel = level;
+ mCount = count;
+ final int shortcutTargetCount = shortcutTargets.size();
+ for (int i = 0; i < shortcutTargetCount; i++) {
+ final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
+ final String shortcutTargetString = new String(shortcutTargetCodePointArray,
+ 0 /* offset */, getCodePointCount(shortcutTargetCodePointArray));
+ mShortcutTargets.add(
+ new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
+ }
+ }
+
+ @UsedForTesting
+ public boolean isValid() {
+ return mProbability != BinaryDictionary.NOT_A_PROBABILITY;
+ }
+} \ No newline at end of file
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 52ac333c4..f2c6d3bec 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
dictionary.cpp \
digraph_utils.cpp \
error_type_utils.cpp \
- multi_bigram_map.cpp) \
+ multi_bigram_map.cpp \
+ unigram_property.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \
proximity_info.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 71b74b24f..716bda5a7 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -24,6 +24,7 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
@@ -258,6 +259,21 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
word1Length);
}
+static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz,
+ jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ const jsize wordLength = env->GetArrayLength(word);
+ int wordCodePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
+ const UnigramProperty unigramProperty = dictionary->getUnigramProperty(
+ wordCodePoints, wordLength);
+ unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
+ outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+}
+
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before);
@@ -333,7 +349,6 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length);
}
-
// Returns how many language model params are processed.
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray languageModelParams, jint startIndex) {
@@ -494,6 +509,11 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
+ const_cast<char *>("getUnigramPropertyNative"),
+ const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty)
+ },
+ {
const_cast<char *>("calcNormalizedScoreNative"),
const_cast<char *>("([I[II)F"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 4ee5a5ed3..e68c0a6d8 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -143,6 +143,13 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
maxResultLength);
}
+const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
+ const int codePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
+ codePoints, codePointCount);
+}
+
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 122e4fc4f..b37b4aa18 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/suggest_interface.h"
@@ -33,6 +34,7 @@ class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
class SuggestOptions;
+class UnigramProperty;
class Dictionary {
public:
@@ -92,6 +94,8 @@ class Dictionary {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
+ const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();
}
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.cpp b/native/jni/src/suggest/core/dictionary/unigram_property.cpp
new file mode 100644
index 000000000..16bbb69d8
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/unigram_property.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/unigram_property.h"
+
+namespace latinime {
+
+void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
+ jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) const {
+ env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints);
+ jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
+ env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
+ env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
+ int historicalInfo[] = {mTimestamp, mLevel, mCount};
+ env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
+ historicalInfo);
+
+ jclass integerClass = env->FindClass("java/lang/Integer");
+ jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
+ jclass arrayListClass = env->FindClass("java/util/ArrayList");
+ jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+ const int shortcutTargetCount = mShortcutTargets.size();
+ for (int i = 0; i < shortcutTargetCount; ++i) {
+ jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
+ env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
+ mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
+ env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
+ env->DeleteLocalRef(shortcutTargetCodePointArray);
+ jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
+ mShortcutProbabilities[i]);
+ env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
+ env->DeleteLocalRef(integerProbability);
+ }
+ env->DeleteLocalRef(integerClass);
+ env->DeleteLocalRef(arrayListClass);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.h b/native/jni/src/suggest/core/dictionary/unigram_property.h
new file mode 100644
index 000000000..e93093bf4
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/unigram_property.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_PROPERTY_H
+#define LATINIME_UNIGRAM_PROPERTY_H
+
+#include <vector>
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+
+// This class is used for returning information belonging to a unigram to java side.
+class UnigramProperty {
+ public:
+ // Invalid unigram.
+ UnigramProperty()
+ : mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false),
+ mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
+ mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(),
+ mShortcutProbabilities() {}
+
+ UnigramProperty(const UnigramProperty &unigramProperty)
+ : mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount),
+ mIsNotAWord(unigramProperty.mIsNotAWord),
+ mIsBlacklisted(unigramProperty.mIsBlacklisted),
+ mHasBigrams(unigramProperty.mHasBigrams),
+ mHasShortcuts(unigramProperty.mHasShortcuts),
+ mProbability(unigramProperty.mProbability),
+ mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel),
+ mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets),
+ mShortcutProbabilities(unigramProperty.mShortcutProbabilities) {
+ memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints));
+ }
+
+ UnigramProperty(const int *const codePoints, const int codePointCount,
+ const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
+ const bool hasShortcuts, const int probability, const int timestamp,
+ const int level, const int count,
+ const std::vector<std::vector<int> > *const shortcutTargets,
+ const std::vector<int> *const shortcutProbabilities)
+ : mCodePoints(), mCodePointCount(codePointCount),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams),
+ mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp),
+ mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets),
+ mShortcutProbabilities(*shortcutProbabilities) {
+ memcpy(mCodePoints, codePoints, sizeof(mCodePoints));
+ }
+
+ void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) const;
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
+
+ int mCodePoints[MAX_WORD_LENGTH];
+ int mCodePointCount;
+ bool mIsNotAWord;
+ bool mIsBlacklisted;
+ bool mHasBigrams;
+ bool mHasShortcuts;
+ int mProbability;
+ // Historical information
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ // Shortcut
+ std::vector<std::vector<int> > mShortcutTargets;
+ std::vector<int> mShortcutProbabilities;
+};
+} // namespace latinime
+#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 417d22e0d..c74a4ebbe 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -18,6 +18,7 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
+#include "suggest/core/dictionary/unigram_property.h"
#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@@ -90,6 +91,10 @@ class DictionaryStructureWithBufferPolicy {
virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) = 0;
+ // Used for testing.
+ virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
+ const int codePointCount) const = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index e78c82725..2adafd22b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -123,6 +123,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
}
+ const UnigramProperty getUnigramProperty(const int *const codePoints,
+ const int codePointCount) const {
+ // getUnigramProperty is not supported.
+ return UnigramProperty();
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 979b8e76d..d1ba1877c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -16,8 +16,11 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include <vector>
+
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -290,4 +293,42 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
}
}
+const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("fetchUnigramProperty is called for invalid word.");
+ return UnigramProperty();
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ const ProbabilityEntry probabilityEntry =
+ mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
+ ptNodeParams.getTerminalId());
+ // Fetch shortcut information.
+ std::vector<std::vector<int> > shortcutTargets;
+ std::vector<int> shortcutProbabilities;
+ if (ptNodeParams.hasShortcutTargets()) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ const ShortcutDictContent *const shortcutDictContent =
+ mBuffers.get()->getShortcutDictContent();
+ bool hasNext = true;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ while (hasNext) {
+ int shortcutTargetLength = 0;
+ int shortcutProbability = NOT_A_PROBABILITY;
+ shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
+ std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
+ shortcutTargets.push_back(target);
+ shortcutProbabilities.push_back(shortcutProbability);
+ }
+ }
+ return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
+ ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
+ probabilityEntry.getTimeStamp(), probabilityEntry.getLevel(),
+ probabilityEntry.getCount(), &shortcutTargets, &shortcutProbabilities);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 78f3a553d..db4e8d21c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -107,6 +107,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
+ const UnigramProperty getUnigramProperty(const int *const codePoints,
+ const int codePointCount) const;
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 9cccdedda..03a302b8f 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -24,6 +24,7 @@ import android.util.Pair;
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.utils.UnigramProperty;
import java.io.File;
import java.io.IOException;
@@ -824,4 +825,52 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
}
}
+
+ public void testGetUnigramProperties() {
+ testGetUnigramProperties(4 /* formatVersion */);
+ }
+
+ private void testGetUnigramProperties(final int formatVersion) {
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+ final int ITERATION_COUNT = 1000;
+ final int codePointSetSize = 20;
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final UnigramProperty invalidUnigramProperty =
+ binaryDictionary.getUnigramProperty("dummyWord");
+ assertFalse(invalidUnigramProperty.isValid());
+
+ for (int i = 0; i < ITERATION_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ final boolean isNotAWord = random.nextBoolean();
+ final boolean isBlacklisted = random.nextBoolean();
+ // TODO: Add tests for shortcut.
+ // TODO: Add tests for historical info.
+ binaryDictionary.addUnigramWord(word, unigramProbability,
+ null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
+ isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ final UnigramProperty unigramProperty =
+ binaryDictionary.getUnigramProperty(word);
+ assertEquals(word, unigramProperty.mCodePoints);
+ assertTrue(unigramProperty.isValid());
+ assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
+ assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
+ assertEquals(false, unigramProperty.mHasBigrams);
+ assertEquals(false, unigramProperty.mHasShortcuts);
+ assertEquals(unigramProbability, unigramProperty.mProbability);
+ assertTrue(unigramProperty.mShortcutTargets.isEmpty());
+ }
+ }
}