aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp37
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h5
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.cpp44
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.h74
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h2
9 files changed, 196 insertions, 32 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 16a3fe825..c919ebd91 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -260,18 +260,39 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
word1Length);
}
+// Method to iterate all words in the dictionary for makedict.
+// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
+// the dictionary does not have a next word.
+static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
+ jlong dict, jint token, jintArray outCodePoints) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return 0;
+ const jsize outCodePointsLength = env->GetArrayLength(outCodePoints);
+ if (outCodePointsLength != MAX_WORD_LENGTH) {
+ AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength);
+ ASSERT(false);
+ return 0;
+ }
+ int wordCodePoints[outCodePointsLength];
+ memset(wordCodePoints, 0, sizeof(wordCodePoints));
+ const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints);
+ env->SetIntArrayRegion(outCodePoints, 0, outCodePointsLength, wordCodePoints);
+ return nextToken;
+}
+
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) {
+ jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
- wordProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
- outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+ wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
+ outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outShortcutProbabilities);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
@@ -521,10 +542,16 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getWordPropertyNative"),
- const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
+ const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
+ "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
+ const_cast<char *>("getNextWordNative"),
+ const_cast<char *>("(JI[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
+ },
+ {
const_cast<char *>("calcNormalizedScoreNative"),
const_cast<char *>("([I[II)F"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 16b1a56b1..9b71eff7a 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -150,6 +150,12 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints,
codePoints, codePointCount);
}
+int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy.get()->getNextWordAndNextToken(
+ token, outCodePoints);
+}
+
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 4a468f3df..0a413cb52 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -96,6 +96,11 @@ class Dictionary {
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ int getNextWordAndNextToken(const int token, int *const outCodePoints);
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();
}
diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp
index ed32bde8b..288e6b05e 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/word_property.cpp
@@ -19,29 +19,53 @@
namespace latinime {
void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
- jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
- jobject outShortcutTargets, jobject outShortcutProbabilities) const {
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+ jobject outBigramProbabilities, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) const {
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
- env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
- int historicalInfo[] = {mTimestamp, mLevel, mCount};
- env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
- historicalInfo);
+ int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount};
+ env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+ probabilityInfo);
jclass integerClass = env->FindClass("java/lang/Integer");
jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
- const int shortcutTargetCount = mShortcutTargets.size();
+
+ // Output bigrams.
+ const int bigramCount = mBigrams.size();
+ for (int i = 0; i < bigramCount; ++i) {
+ const BigramProperty *const bigramProperty = &mBigrams[i];
+ const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints();
+ jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
+ env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
+ word1CodePoints->size(), &word1CodePoints->at(0));
+ env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
+ env->DeleteLocalRef(bigramWord1CodePointArray);
+
+ int bigramProbabilityInfo[] = {bigramProperty->getProbability(),
+ bigramProperty->getTimestamp(), bigramProperty->getLevel(),
+ bigramProperty->getCount()};
+ jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
+ env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
+ NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
+ env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
+ env->DeleteLocalRef(bigramProbabilityInfoArray);
+ }
+
+ // Output shortcuts.
+ const int shortcutTargetCount = mShortcuts.size();
for (int i = 0; i < shortcutTargetCount; ++i) {
- jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
+ const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints();
+ jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
- mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
+ targetCodePoints->size(), &targetCodePoints->at(0));
env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
env->DeleteLocalRef(shortcutTargetCodePointArray);
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
- mShortcutProbabilities[i]);
+ mShortcuts[i].getProbability());
env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
env->DeleteLocalRef(integerProbability);
}
diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h
index dcac8536a..40b1a91a4 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/word_property.h
@@ -28,27 +28,78 @@ namespace latinime {
// This class is used for returning information belonging to a word to java side.
class WordProperty {
public:
- // TODO: Add bigram information.
+ class BigramProperty {
+ public:
+ BigramProperty(const std::vector<int> *const targetCodePoints,
+ const int probability, const int timestamp, const int level, const int count)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ private:
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ };
+
+ class ShortcutProperty {
+ public:
+ ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ private:
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ };
+
// Invalid word.
WordProperty()
: mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false),
mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
- mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {}
+ mTimestamp(0), mLevel(0), mCount(0), mBigrams(), mShortcuts() {}
WordProperty(const std::vector<int> *const codePoints,
const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
const bool hasShortcuts, const int probability, const int timestamp,
- const int level, const int count,
- const std::vector<std::vector<int> > *const shortcutTargets,
- const std::vector<int> *const shortcutProbabilities)
+ const int level, const int count, const std::vector<BigramProperty> *const bigrams,
+ const std::vector<ShortcutProperty> *const shortcuts)
: mCodePoints(*codePoints), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
mHasBigrams(hasBigrams), mHasShortcuts(hasShortcuts), mProbability(probability),
- mTimestamp(timestamp), mLevel(level), mCount(count),
- mShortcutTargets(*shortcutTargets), mShortcutProbabilities(*shortcutProbabilities) {}
+ mTimestamp(timestamp), mLevel(level), mCount(count), mBigrams(*bigrams),
+ mShortcuts(*shortcuts) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) const;
+ jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
@@ -63,9 +114,8 @@ class WordProperty {
int mTimestamp;
int mLevel;
int mCount;
- // Shortcut
- std::vector<std::vector<int> > mShortcutTargets;
- std::vector<int> mShortcutProbabilities;
+ std::vector<BigramProperty> mBigrams;
+ std::vector<ShortcutProperty> mShortcuts;
};
} // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index b878984f1..784419586 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -95,6 +95,11 @@ class DictionaryStructureWithBufferPolicy {
virtual const WordProperty getWordProperty(const int *const codePonts,
const int codePointCount) const = 0;
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 67d615e86..319c81569 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -129,6 +129,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return WordProperty();
}
+ int getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ // getNextWordAndNextToken is not supported.
+ return 0;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 0b067e127..1c420e070 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -332,9 +332,44 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch bigram information.
+ std::vector<WordProperty::BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ if (bigramListPos != NOT_A_DICT_POS) {
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ const BigramDictContent *const bigramDictContent = mBuffers.get()->getBigramDictContent();
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers.get()->getTerminalPositionLookupTable();
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ const int word1TerminalId = bigramEntry.getTargetTerminalId();
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
+ if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
+ bigramEntry.getProbability();
+ bigrams.push_back(WordProperty::BigramProperty(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount()));
+ }
+ }
// Fetch shortcut information.
- std::vector<std::vector<int> > shortcutTargets;
- std::vector<int> shortcutProbabilities;
+ std::vector<WordProperty::ShortcutProperty> shortcuts;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
if (shortcutPos != NOT_A_DICT_POS) {
int shortcutTarget[MAX_WORD_LENGTH];
@@ -347,15 +382,20 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
- shortcutTargets.push_back(target);
- shortcutProbabilities.push_back(shortcutProbability);
+ shortcuts.push_back(WordProperty::ShortcutProperty(&target, shortcutProbability));
}
}
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount(), &shortcutTargets, &shortcutProbabilities);
+ historicalInfo->getCount(), &bigrams, &shortcuts);
+}
+
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token,
+ int *const outCodePoints) {
+ // TODO: Implement.
+ return 0;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index a43bd0eca..1bcd4ceea 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -109,6 +109,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
+ int getNextWordAndNextToken(const int token, int *const outCodePoints);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);