aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni')
-rw-r--r--native/jni/Android.mk2
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp28
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp12
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp5
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp46
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp131
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h)25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h6
18 files changed, 325 insertions, 145 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index d83bdadfa..ee93b4248 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -70,7 +70,7 @@ LATIN_IME_CORE_SRC_FILES := \
bigram/bigram_list_read_write_utils.cpp \
bigram/dynamic_bigram_list_policy.cpp \
header/header_policy.cpp \
- header/header_reading_utils.cpp \
+ header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
dictionary_structure_with_buffer_policy_factory.cpp \
dynamic_patricia_trie_node_reader.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index a63fab6dc..7f47493b2 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -188,8 +188,8 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
return dictionary->getProbability(codePoints, wordLength);
}
-static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jintArray word1) {
+static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
+ jlong dict, jintArray word0, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0);
@@ -198,7 +198,8 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass claz
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length);
+ return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints,
+ word1Length);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
@@ -269,6 +270,16 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length);
}
+static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
+ jlong dict, jint unigramProbability, jint bigramProbability) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return NOT_A_PROBABILITY;
+ }
+ return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
+ bigramProbability);
+}
+
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("openNative"),
@@ -306,9 +317,9 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
},
{
- const_cast<char *>("isValidBigramNative"),
- const_cast<char *>("(J[I[I)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)
+ const_cast<char *>("getBigramProbabilityNative"),
+ const_cast<char *>("(J[I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
const_cast<char *>("calcNormalizedScoreNative"),
@@ -334,6 +345,11 @@ static const JNINativeMethod sMethods[] = {
const_cast<char *>("removeBigramWordsNative"),
const_cast<char *>("(J[I[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
+ },
+ {
+ const_cast<char *>("calculateProbabilityNative"),
+ const_cast<char *>("(JII)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
}
};
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 425b07624..5ba71c168 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -150,24 +150,26 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
}
-bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
+int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
int length1) const {
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) return false;
+ if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
- if (NOT_A_DICT_POS == nextWordPos) return false;
+ if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
BinaryDictionaryBigramsIterator bigramsIt(
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) {
- return true;
+ return mDictionaryStructurePolicy->getProbability(
+ mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
+ bigramsIt.getProbability());
}
}
- return false;
+ return NOT_A_PROBABILITY;
}
// TODO: Move functions related to bigram to here
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 99b964c49..8af7ee75d 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -29,7 +29,7 @@ class BigramDictionary {
int getPredictions(const int *word, int length, int *outBigramCodePoints,
int *outBigramProbability, int *outputTypes) const;
- bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
+ int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
private:
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 033572201..ec1b63a12 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -93,8 +93,9 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}
-bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
- return mBigramDictionary->isValidBigram(word0, length0, word1, length1);
+int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
+ int length1) const {
+ return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 06e84bbfe..974447468 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -67,7 +67,7 @@ class Dictionary {
int getProbability(const int *word, int length) const;
- bool isValidBigram(const int *word0, int length0, const int *word1, int length1) const;
+ int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 3cfbfd85b..f91dd0e56 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -248,7 +248,9 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return;
}
- // TODO: Implement.
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFile(filePath, &mHeaderPolicy);
}
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 2cbb0ff3b..ebe1f3212 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -33,7 +33,7 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 311d31e5d..31178fb5c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -16,17 +16,23 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include <cstdio>
+#include <cstring>
+
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
+ ".tmp";
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
@@ -131,6 +137,46 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
}
+void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
+ const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
+ return;
+ }
+ const int tmpFileNameBufSize = strlen(fileName)
+ + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1;
+ char tmpFileName[tmpFileNameBufSize];
+ snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
+ TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ FILE *const file = fopen(tmpFileName, "wb");
+ if (!file) {
+ return;
+ }
+ // Write header.
+ if (fwrite(headerBuffer.getBuffer(true /* usesAdditionalBuffer */),
+ headerBuffer.getTailPosition(), 1, file) < 1) {
+ fclose(file);
+ remove(tmpFileName);
+ return;
+ }
+ // Write data in original buffer.
+ if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */),
+ mBuffer->getOriginalBufferSize(), 1, file) < 1) {
+ fclose(file);
+ remove(tmpFileName);
+ return;
+ }
+ // Write data in additional buffer.
+ if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */),
+ mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) {
+ fclose(file);
+ remove(tmpFileName);
+ return;
+ }
+ fclose(file);
+ rename(tmpFileName, fileName);
+}
+
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
const int bigramLinkedNodePos) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index 20e35abcf..219ea9857 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#include <stdint.h>
+
#include "defines.h"
namespace latinime {
@@ -26,6 +28,7 @@ class DynamicBigramListPolicy;
class DynamicPatriciaTrieNodeReader;
class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy;
+class HeaderPolicy;
class DynamicPatriciaTrieWritingHelper {
public:
@@ -46,10 +49,15 @@ class DynamicPatriciaTrieWritingHelper {
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
+ void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);
+
+ void writeToDictFileWithGC(const char *const fileName, const HeaderPolicy *const headerPolicy);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
static const int CHILDREN_POSITION_FIELD_SIZE;
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 196da5c97..47ace23a1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -17,6 +17,8 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include <cstddef>
+#include <cstdio>
+#include <ctime>
namespace latinime {
@@ -36,7 +38,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
}
std::vector<int> keyCodePointVector;
insertCharactersIntoVector(key, &keyCodePointVector);
- HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
+ HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
if (it == mAttributeMap.end()) {
// The key was not found.
outValue[0] = '?';
@@ -85,7 +87,7 @@ int HeaderPolicy::readLastUpdatedTime() const {
bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const {
std::vector<int> keyVector;
insertCharactersIntoVector(key, &keyVector);
- HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector);
+ HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector);
if (it == mAttributeMap.end()) {
// The key was not found.
return false;
@@ -94,10 +96,56 @@ bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outV
return true;
}
-/* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes(
- const uint8_t *const dictBuf) {
- HeaderReadingUtils::AttributeMap attributeMap;
- HeaderReadingUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
+bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool updatesLastUpdatedTime) const {
+ int writingPos = 0;
+ if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
+ &writingPos)) {
+ return false;
+ }
+ if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Temporarily writes a dummy header size.
+ int headerSizeFieldPos = writingPos;
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */,
+ &writingPos)) {
+ return false;
+ }
+ if (updatesLastUpdatedTime) {
+ // Set current time as a last updated time.
+ HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
+ std::vector<int> updatedTimekey;
+ insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
+ const time_t currentTime = time(NULL);
+ std::vector<int> updatedTimeValue;
+ char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
+ snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime);
+ insertCharactersIntoVector(charBuf, &updatedTimeValue);
+ attributeMapTowrite[updatedTimekey] = updatedTimeValue;
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
+ &writingPos)) {
+ return false;
+ }
+ } else {
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap,
+ &writingPos)) {
+ return false;
+ }
+ }
+ // Writes an actual header size.
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos,
+ &headerSizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+/* static */ HeaderReadWriteUtils::AttributeMap
+ HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
+ HeaderReadWriteUtils::AttributeMap attributeMap;
+ HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
return attributeMap;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 930b475c7..6b396f3f2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -22,15 +22,18 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
- explicit HeaderPolicy(const uint8_t *const dictBuf)
- : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)),
- mSize(HeaderReadingUtils::getHeaderSize(dictBuf)),
+ explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
+ : mDictBuf(dictBuf),
+ mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
+ mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(readUsesForgettingCurveFlag()),
@@ -43,16 +46,15 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
}
AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
+ return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return HeaderReadingUtils::requiresFrenchLigatureProcessing(
- mDictionaryFlags);
+ return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
@@ -70,6 +72,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
+ bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool updatesLastUpdatedTime) const;
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
@@ -80,9 +85,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
const uint8_t *const mDictBuf;
- const HeaderReadingUtils::DictionaryFlags mDictionaryFlags;
+ const FormatUtils::FORMAT_VERSION mDictFormatVersion;
+ const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
const int mSize;
- HeaderReadingUtils::AttributeMap mAttributeMap;
+ HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
const bool mUsesForgettingCurve;
const int mLastUpdatedTime;
@@ -95,7 +101,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
bool getAttributeValueAsInt(const char *const key, int *const outValue) const;
- static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes(
+ static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
static int parseIntAttributeValue(const std::vector<int> *const attributeValue);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
new file mode 100644
index 000000000..80fe88671
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
+
+const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
+const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
+
+const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
+ + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
+ return ByteArrayUtils::readUint16(dictBuf,
+ HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
+}
+
+/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
+ AttributeMap *const headerAttributes) {
+ const int headerSize = getHeaderSize(dictBuf);
+ int pos = getHeaderOptionsPosition();
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return;
+ }
+ int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
+ int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
+ while (pos < headerSize) {
+ const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
+ std::vector<int> key;
+ key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
+ const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
+ std::vector<int> value;
+ value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
+ headerAttributes->insert(AttributeMap::value_type(key, value));
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
+ BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
+ int *const writingPos) {
+ if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
+ writingPos)) {
+ return false;
+ }
+ switch (version) {
+ case FormatUtils::VERSION_2:
+ // Version 2 dictionary writing is not supported.
+ return false;
+ case FormatUtils::VERSION_3:
+ return buffer->writeUintAndAdvancePosition(3 /* data */,
+ HEADER_DICTIONARY_VERSION_SIZE, writingPos);
+ default:
+ return false;
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
+ BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
+ int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
+ BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
+ BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
+ int *const writingPos) {
+ for (AttributeMap::const_iterator it = headerAttributes->begin();
+ it != headerAttributes->end(); ++it) {
+ // Write a key.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ // Write a value.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index 5716198fb..6cce73375 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -14,18 +14,21 @@
* limitations under the License.
*/
-#ifndef LATINIME_HEADER_READING_UTILS_H
-#define LATINIME_HEADER_READING_UTILS_H
+#ifndef LATINIME_HEADER_READ_WRITE_UTILS_H
+#define LATINIME_HEADER_READ_WRITE_UTILS_H
#include <map>
#include <stdint.h>
#include <vector>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
-class HeaderReadingUtils {
+class BufferWithExtendableBuffer;
+
+class HeaderReadWriteUtils {
public:
typedef uint16_t DictionaryFlags;
typedef std::map<std::vector<int>, std::vector<int> > AttributeMap;
@@ -54,8 +57,20 @@ class HeaderReadingUtils {
static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
AttributeMap *const headerAttributes);
+ static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
+ const FormatUtils::FORMAT_VERSION version, int *const writingPos);
+
+ static bool writeDictionaryFlags(BufferWithExtendableBuffer *const buffer,
+ const DictionaryFlags flags, int *const writingPos);
+
+ static bool writeDictionaryHeaderSize(BufferWithExtendableBuffer *const buffer,
+ const int size, int *const writingPos);
+
+ static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
+ const AttributeMap *const headerAttributes, int *const writingPos);
+
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
static const int MAX_ATTRIBUTE_KEY_LENGTH;
static const int MAX_ATTRIBUTE_VALUE_LENGTH;
@@ -75,4 +90,4 @@ class HeaderReadingUtils {
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
};
}
-#endif /* LATINIME_HEADER_READING_UTILS_H */
+#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
deleted file mode 100644
index 186c043c1..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
-
-#include <vector>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
-
-namespace latinime {
-
-const int HeaderReadingUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
-const int HeaderReadingUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
-
-const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
-const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
-const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2;
-const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4;
-
-const HeaderReadingUtils::DictionaryFlags HeaderReadingUtils::NO_FLAGS = 0;
-// Flags for special processing
-// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
-// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-
-/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) {
- // See the format of the header in the comment in
- // BinaryDictionaryFormatUtils::detectFormatVersion()
- return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
- + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
-}
-
-/* static */ HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) {
- return ByteArrayUtils::readUint16(dictBuf,
- HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
-}
-
-/* static */ void HeaderReadingUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
- AttributeMap *const headerAttributes) {
- const int headerSize = getHeaderSize(dictBuf);
- int pos = getHeaderOptionsPosition();
- if (pos == NOT_A_DICT_POS) {
- // The header doesn't have header options.
- return;
- }
- int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
- int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
- while (pos < headerSize) {
- const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
- MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
- std::vector<int> key;
- key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
- const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
- MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
- std::vector<int> value;
- value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
- headerAttributes->insert(AttributeMap::value_type(key, value));
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index cee3e4ab2..697d0159c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -34,7 +34,7 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 3796c7b7b..1d77d5c27 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -20,20 +20,10 @@
namespace latinime {
-/**
- * Dictionary size
- */
-// Any file smaller than this is not a dictionary.
-const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
+const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
-/**
- * Format versions
- */
-// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported
-// or obsolete dictionary formats.
-const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
-// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
+// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
+const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
const uint8_t *const dict, const int dictSize) {
@@ -45,16 +35,10 @@ const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
}
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
- case HEADER_VERSION_2_MAGIC_NUMBER:
- // Version 2 header are at least 12 bytes long.
- // If this header has the version 2 magic number but is less than 12 bytes long,
- // then it's an unknown format and we need to avoid confidently reading the next bytes.
- if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
- return UNKNOWN_VERSION;
- }
+ case MAGIC_NUMBER:
// Version 2 header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
- // Version number (2 bytes)
+ // Dictionary format version number (2 bytes)
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
if (ByteArrayUtils::readUint16(dict, 4) == 2) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index f84321577..79ed0de29 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -34,14 +34,16 @@ class FormatUtils {
UNKNOWN_VERSION
};
+ // 32 bit magic number is stored at the beginning of the dictionary header to reject
+ // unsupported or obsolete dictionary formats.
+ static const uint32_t MAGIC_NUMBER;
+
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
- static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
- static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_FORMAT_UTILS_H */