aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.h87
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.cpp (renamed from native/jni/src/suggest/core/dictionary/unigram_property.cpp)30
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.h101
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp55
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp6
13 files changed, 232 insertions, 170 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index e68c0a6d8..16b1a56b1 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -143,10 +143,10 @@ void Dictionary::getProperty(const char *const query, const int queryLength, cha
maxResultLength);
}
-const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
+const WordProperty Dictionary::getWordProperty(const int *const codePoints,
const int codePointCount) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
+ return mDictionaryStructureWithBufferPolicy.get()->getWordProperty(
codePoints, codePointCount);
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index b37b4aa18..4a468f3df 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -22,7 +22,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
-#include "suggest/core/dictionary/unigram_property.h"
+#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/suggest_interface.h"
@@ -34,7 +34,7 @@ class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
class SuggestOptions;
-class UnigramProperty;
+class WordProperty;
class Dictionary {
public:
@@ -94,7 +94,7 @@ class Dictionary {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
- const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
+ const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.h b/native/jni/src/suggest/core/dictionary/unigram_property.h
deleted file mode 100644
index c4ebb86ab..000000000
--- a/native/jni/src/suggest/core/dictionary/unigram_property.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_UNIGRAM_PROPERTY_H
-#define LATINIME_UNIGRAM_PROPERTY_H
-
-#include <cstring>
-#include <vector>
-
-#include "defines.h"
-#include "jni.h"
-
-namespace latinime {
-
-// This class is used for returning information belonging to a unigram to java side.
-class UnigramProperty {
- public:
- // Invalid unigram.
- UnigramProperty()
- : mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false),
- mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
- mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {}
-
- UnigramProperty(const UnigramProperty &unigramProperty)
- : mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount),
- mIsNotAWord(unigramProperty.mIsNotAWord),
- mIsBlacklisted(unigramProperty.mIsBlacklisted),
- mHasBigrams(unigramProperty.mHasBigrams),
- mHasShortcuts(unigramProperty.mHasShortcuts),
- mProbability(unigramProperty.mProbability),
- mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel),
- mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets),
- mShortcutProbabilities(unigramProperty.mShortcutProbabilities) {
- memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints));
- }
-
- UnigramProperty(const int *const codePoints, const int codePointCount,
- const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
- const bool hasShortcuts, const int probability, const int timestamp,
- const int level, const int count,
- const std::vector<std::vector<int> > *const shortcutTargets,
- const std::vector<int> *const shortcutProbabilities)
- : mCodePoints(), mCodePointCount(codePointCount),
- mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams),
- mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp),
- mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets),
- mShortcutProbabilities(*shortcutProbabilities) {
- memcpy(mCodePoints, codePoints, sizeof(mCodePoints));
- }
-
- void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) const;
-
- private:
- DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
-
- int mCodePoints[MAX_WORD_LENGTH];
- int mCodePointCount;
- bool mIsNotAWord;
- bool mIsBlacklisted;
- bool mHasBigrams;
- bool mHasShortcuts;
- int mProbability;
- // Historical information
- int mTimestamp;
- int mLevel;
- int mCount;
- // Shortcut
- std::vector<std::vector<int> > mShortcutTargets;
- std::vector<int> mShortcutProbabilities;
-};
-} // namespace latinime
-#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp
index 16bbb69d8..d8c330bbd 100644
--- a/native/jni/src/suggest/core/dictionary/unigram_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/word_property.cpp
@@ -14,34 +14,38 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/unigram_property.h"
+#include "suggest/core/dictionary/word_property.h"
namespace latinime {
-void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
- jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
- jobject outShortcutTargets, jobject outShortcutProbabilities) const {
- env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints);
+void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+ jobject outBigramProbabilities, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) const {
+ env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
- env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
- int historicalInfo[] = {mTimestamp, mLevel, mCount};
- env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
- historicalInfo);
+ int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount};
+ env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+ probabilityInfo);
jclass integerClass = env->FindClass("java/lang/Integer");
jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
- const int shortcutTargetCount = mShortcutTargets.size();
+
+ // TODO: Output bigrams.
+ // Output shortcuts.
+ const int shortcutTargetCount = mShortcuts.size();
for (int i = 0; i < shortcutTargetCount; ++i) {
- jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
+ const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints();
+ jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
- mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
+ targetCodePoints->size(), &targetCodePoints->at(0));
env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
env->DeleteLocalRef(shortcutTargetCodePointArray);
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
- mShortcutProbabilities[i]);
+ mShortcuts[i].getProbability());
env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
env->DeleteLocalRef(integerProbability);
}
diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h
new file mode 100644
index 000000000..cc06b1baa
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/word_property.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORD_PROPERTY_H
+#define LATINIME_WORD_PROPERTY_H
+
+#include <cstring>
+#include <vector>
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+
+// This class is used for returning information belonging to a word to java side.
+class WordProperty {
+ public:
+ class BigramProperty {
+ public:
+ BigramProperty(const std::vector<int> *const targetCodePoints,
+ const int probability, const int timestamp, const int level, const int count)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ private:
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ };
+
+ class ShortcutProperty {
+ public:
+ ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ private:
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ };
+
+ // Invalid word.
+ WordProperty()
+ : mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false),
+ mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
+ mTimestamp(0), mLevel(0), mCount(0), mBigrams(), mShortcuts() {}
+
+ WordProperty(const std::vector<int> *const codePoints,
+ const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
+ const bool hasShortcuts, const int probability, const int timestamp,
+ const int level, const int count, const std::vector<BigramProperty> *const bigrams,
+ const std::vector<ShortcutProperty> *const shortcuts)
+ : mCodePoints(*codePoints), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
+ mHasBigrams(hasBigrams), mHasShortcuts(hasShortcuts), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count), mBigrams(*bigrams),
+ mShortcuts(*shortcuts) {}
+
+ void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) const;
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
+
+ std::vector<int> mCodePoints;
+ bool mIsNotAWord;
+ bool mIsBlacklisted;
+ bool mHasBigrams;
+ bool mHasShortcuts;
+ int mProbability;
+ // Historical information
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ std::vector<BigramProperty> mBigrams;
+ std::vector<ShortcutProperty> mShortcuts;
+};
+} // namespace latinime
+#endif // LATINIME_WORD_PROPERTY_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index c74a4ebbe..b878984f1 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -18,7 +18,7 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
-#include "suggest/core/dictionary/unigram_property.h"
+#include "suggest/core/dictionary/word_property.h"
#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@@ -92,7 +92,7 @@ class DictionaryStructureWithBufferPolicy {
const int maxResultLength) = 0;
// Used for testing.
- virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
+ virtual const WordProperty getWordProperty(const int *const codePonts,
const int codePointCount) const = 0;
protected:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index be7a3c228..7504524f0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -24,7 +24,7 @@ const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
"REQUIRES_GERMAN_UMLAUT_PROCESSING";
// TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
-const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
+const char *const HeaderPolicy::DATE_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
@@ -73,51 +73,56 @@ bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
}
-bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount, const int extendedRegionSize) const {
+bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount,
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const {
int writingPos = 0;
- if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
+ HeaderReadWriteUtils::AttributeMap attributeMapToWrite(mAttributeMap);
+ fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
+ extendedRegionSize, &attributeMapToWrite);
+ if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
&writingPos)) {
return false;
}
- if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags,
+ if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
&writingPos)) {
return false;
}
// Temporarily writes a dummy header size.
int headerSizeFieldPos = writingPos;
- if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */,
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */,
&writingPos)) {
return false;
}
- HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, UNIGRAM_COUNT_KEY, unigramCount);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, BIGRAM_COUNT_KEY, bigramCount);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, EXTENDED_REGION_SIZE_KEY,
- extendedRegionSize);
- if (updatesLastUpdatedTime) {
- // Set current time as a last updated time.
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
- TimeKeeper::peekCurrentTime());
- }
- if (updatesLastDecayedTime) {
- // Set current time as a last updated time.
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY,
- TimeKeeper::peekCurrentTime());
- }
- if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToWrite,
&writingPos)) {
return false;
}
- // Writes an actual header size.
- if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos,
+ // Writes the actual header size.
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
&headerSizeFieldPos)) {
return false;
}
return true;
}
+void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount,
+ const int bigramCount, const int extendedRegionSize,
+ HeaderReadWriteUtils::AttributeMap *outAttributeMap) const {
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
+ extendedRegionSize);
+ // Set the current time as the generation time.
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
+ TimeKeeper::peekCurrentTime());
+ if (updatesLastDecayedTime) {
+ // Set current time as the last updated time.
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME_KEY,
+ TimeKeeper::peekCurrentTime());
+ }
+}
+
/* static */ HeaderReadWriteUtils::AttributeMap
HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
HeaderReadWriteUtils::AttributeMap attributeMap;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 1208d2c2a..a44f9f0fc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -39,8 +39,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
- mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
@@ -62,10 +62,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
- mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
@@ -75,7 +75,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
mAttributeMap(), mMultiWordCostMultiplier(0.0f),
mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
- mLastUpdatedTime(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
+ mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false) {}
~HeaderPolicy() {}
@@ -122,8 +122,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mRequiresGermanUmlautProcessing;
}
- AK_FORCE_INLINE int getLastUpdatedTime() const {
- return mLastUpdatedTime;
+ AK_FORCE_INLINE int getDate() const {
+ return mDate;
}
AK_FORCE_INLINE int getLastDecayedTime() const {
@@ -149,9 +149,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
- bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount, const int extendedRegionSize) const;
+ bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount,
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const;
+
+ void fillInHeader(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount, const int extendedRegionSize,
+ HeaderReadWriteUtils::AttributeMap *outAttributeMap) const;
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
@@ -159,7 +163,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
static const char *const IS_DECAYING_DICT_KEY;
- static const char *const LAST_UPDATED_TIME_KEY;
+ static const char *const DATE_KEY;
static const char *const LAST_DECAYED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
@@ -175,7 +179,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const float mMultiWordCostMultiplier;
const bool mRequiresGermanUmlautProcessing;
const bool mIsDecayingDict;
- const int mLastUpdatedTime;
+ const int mDate;
const int mLastDecayedTime;
const int mUnigramCount;
const int mBigramCount;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 2adafd22b..67d615e86 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -123,10 +123,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
}
- const UnigramProperty getUnigramProperty(const int *const codePoints,
+ const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const {
- // getUnigramProperty is not supported.
- return UnigramProperty();
+ // getWordProperty is not supported.
+ return WordProperty();
}
private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index b4730fe68..5022bafcb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -20,7 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/unigram_property.h"
+#include "suggest/core/dictionary/word_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -317,22 +317,59 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
}
}
-const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
+const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
const int codePointCount) const {
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
false /* forceLowerCaseSearch */);
if (ptNodePos == NOT_A_DICT_POS) {
- AKLOGE("fetchUnigramProperty is called for invalid word.");
- return UnigramProperty();
+ AKLOGE("getWordProperty is called for invalid word.");
+ return WordProperty();
}
const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
const ProbabilityEntry probabilityEntry =
mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch bigram information.
+ std::vector<WordProperty::BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ if (bigramListPos != NOT_A_DICT_POS) {
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ const BigramDictContent *const bigramDictContent = mBuffers.get()->getBigramDictContent();
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers.get()->getTerminalPositionLookupTable();
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ const int word1TerminalId = bigramEntry.getTargetTerminalId();
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
+ if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
+ bigramEntry.getProbability();
+ bigrams.push_back(WordProperty::BigramProperty(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount()));
+ }
+ }
// Fetch shortcut information.
- std::vector<std::vector<int> > shortcutTargets;
- std::vector<int> shortcutProbabilities;
+ std::vector<WordProperty::ShortcutProperty> shortcuts;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
if (shortcutPos != NOT_A_DICT_POS) {
int shortcutTarget[MAX_WORD_LENGTH];
@@ -345,15 +382,14 @@ const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *cons
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
- shortcutTargets.push_back(target);
- shortcutProbabilities.push_back(shortcutProbability);
+ shortcuts.push_back(WordProperty::ShortcutProperty(&target, shortcutProbability));
}
}
- return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
- ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
+ return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
+ ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount(), &shortcutTargets, &shortcutProbabilities);
+ historicalInfo->getCount(), &bigrams, &shortcuts);
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 81aed20a3..a43bd0eca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -106,7 +106,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
- const UnigramProperty getUnigramProperty(const int *const codePoints,
+ const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 43227635c..672097455 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -39,11 +39,11 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPat
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
- false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
- AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, "
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ AKLOGE("Cannot write header structure to buffer. "
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
- "extendedRegionSize: %d", false, false, unigramCount, bigramCount,
+ "extendedRegionSize: %d", false, unigramCount, bigramCount,
extendedRegionSize);
return;
}
@@ -62,9 +62,8 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
}
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- true /* updatesLastDecayedTime */, unigramCount, bigramCount,
- 0 /* extendedRegionSize */)) {
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
return;
}
dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 442373b29..84403c807 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -48,9 +48,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy);
- headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(),
- true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */,
- 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
+ headerPolicy.fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ 0 /* unigramCount */, 0 /* bigramCount */,
+ 0 /* extendedRegionSize */, dictBuffers.get()->getWritableHeaderBuffer());
if (!DynamicPtWritingUtils::writeEmptyDictionary(
dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");