aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/NativeFileList.mk1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp143
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h46
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h4
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp92
10 files changed, 336 insertions, 33 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index 7a732a588..4a89b2b63 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -128,6 +128,7 @@ LATIN_IME_CORE_TEST_FILES := \
suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
+ suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp \
suggest/policyimpl/dictionary/utils/trie_map_test.cpp \
utils/autocorrection_threshold_utils_test.cpp \
utils/int_array_view_test.cpp
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index bbcea2ee0..ea2d24e67 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -16,6 +16,11 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+#include <algorithm>
+#include <cstring>
+
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
namespace latinime {
bool LanguageModelDictContent::save(FILE *const file) const {
@@ -66,6 +71,19 @@ bool LanguageModelDictContent::removeNgramProbabilityEntry(const WordIdArrayView
return mTrieMap.remove(wordId, bitmapEntryIndex);
}
+bool LanguageModelDictContent::truncateEntries(const int *const entryCounts,
+ const int *const maxEntryCounts, const HeaderPolicy *const headerPolicy) {
+ for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) {
+ if (entryCounts[i] <= maxEntryCounts[i]) {
+ continue;
+ }
+ if (!turncateEntriesInSpecifiedLevel(headerPolicy, maxEntryCounts[i], i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
bool LanguageModelDictContent::runGCInner(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange,
@@ -118,4 +136,129 @@ int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWord
return bitmapEntryIndex;
}
+bool LanguageModelDictContent::updateAllProbabilityEntriesInner(const int bitmapEntryIndex,
+ const int level, const HeaderPolicy *const headerPolicy, int *const outEntryCounts) {
+ for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) {
+ if (level > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ AKLOGE("Invalid level. level: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.",
+ level, MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ return false;
+ }
+ const ProbabilityEntry probabilityEntry =
+ ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo);
+ if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ probabilityEntry.getHistoricalInfo(), headerPolicy);
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo, headerPolicy)) {
+ // Update the entry.
+ const ProbabilityEntry updatedEntry(probabilityEntry.getFlags(), &historicalInfo);
+ if (!mTrieMap.put(entry.key(), updatedEntry.encode(mHasHistoricalInfo),
+ bitmapEntryIndex)) {
+ return false;
+ }
+ } else {
+ // Remove the entry.
+ if (!mTrieMap.remove(entry.key(), bitmapEntryIndex)) {
+ return false;
+ }
+ continue;
+ }
+ }
+ if (!probabilityEntry.representsBeginningOfSentence()) {
+ outEntryCounts[level] += 1;
+ }
+ if (!entry.hasNextLevelMap()) {
+ continue;
+ }
+ if (!updateAllProbabilityEntriesInner(entry.getNextLevelBitmapEntryIndex(), level + 1,
+ headerPolicy, outEntryCounts)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel(
+ const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel) {
+ std::vector<int> prevWordIds;
+ std::vector<EntryInfoToTurncate> entryInfoVector;
+ if (!getEntryInfo(headerPolicy, targetLevel, mTrieMap.getRootBitmapEntryIndex(),
+ &prevWordIds, &entryInfoVector)) {
+ return false;
+ }
+ if (static_cast<int>(entryInfoVector.size()) <= maxEntryCount) {
+ return true;
+ }
+ const int entryCountToRemove = static_cast<int>(entryInfoVector.size()) - maxEntryCount;
+ std::partial_sort(entryInfoVector.begin(), entryInfoVector.begin() + entryCountToRemove,
+ entryInfoVector.end(),
+ EntryInfoToTurncate::Comparator());
+ for (int i = 0; i < entryCountToRemove; ++i) {
+ const EntryInfoToTurncate &entryInfo = entryInfoVector[i];
+ if (!removeNgramProbabilityEntry(
+ WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mEntryLevel), entryInfo.mKey)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPolicy,
+ const int targetLevel, const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
+ std::vector<EntryInfoToTurncate> *const outEntryInfo) const {
+ const int currentLevel = prevWordIds->size();
+ for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) {
+ if (currentLevel < targetLevel) {
+ if (!entry.hasNextLevelMap()) {
+ continue;
+ }
+ prevWordIds->push_back(entry.key());
+ if (!getEntryInfo(headerPolicy, targetLevel, entry.getNextLevelBitmapEntryIndex(),
+ prevWordIds, outEntryInfo)) {
+ return false;
+ }
+ prevWordIds->pop_back();
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo);
+ const int probability = (mHasHistoricalInfo) ?
+ ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
+ headerPolicy) : probabilityEntry.getProbability();
+ outEntryInfo->emplace_back(probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp(),
+ entry.key(), targetLevel, prevWordIds->data());
+ }
+ return true;
+}
+
+bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()(
+ const EntryInfoToTurncate &left, const EntryInfoToTurncate &right) const {
+ if (left.mProbability != right.mProbability) {
+ return left.mProbability < right.mProbability;
+ }
+ if (left.mTimestamp != right.mTimestamp) {
+ return left.mTimestamp > right.mTimestamp;
+ }
+ if (left.mKey != right.mKey) {
+ return left.mKey < right.mKey;
+ }
+ if (left.mEntryLevel != right.mEntryLevel) {
+ return left.mEntryLevel > right.mEntryLevel;
+ }
+ for (int i = 0; i < left.mEntryLevel; ++i) {
+ if (left.mPrevWordIds[i] != right.mPrevWordIds[i]) {
+ return left.mPrevWordIds[i] < right.mPrevWordIds[i];
+ }
+ }
+ // left and rigth represent the same entry.
+ return false;
+}
+
+LanguageModelDictContent::EntryInfoToTurncate::EntryInfoToTurncate(const int probability,
+ const int timestamp, const int key, const int entryLevel, const int *const prevWordIds)
+ : mProbability(probability), mTimestamp(timestamp), mKey(key), mEntryLevel(entryLevel) {
+ memmove(mPrevWordIds, prevWordIds, mEntryLevel * sizeof(mPrevWordIds[0]));
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index bd07f2f62..43b2aab66 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -18,6 +18,7 @@
#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
#include <cstdio>
+#include <vector>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
@@ -29,6 +30,8 @@
namespace latinime {
+class HeaderPolicy;
+
/**
* Class representing language model.
*
@@ -73,9 +76,45 @@ class LanguageModelDictContent {
bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId);
+ bool updateAllProbabilityEntries(const HeaderPolicy *const headerPolicy,
+ int *const outEntryCounts) {
+ for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) {
+ outEntryCounts[i] = 0;
+ }
+ return updateAllProbabilityEntriesInner(mTrieMap.getRootBitmapEntryIndex(), 0 /* level */,
+ headerPolicy, outEntryCounts);
+ }
+
+ // entryCounts should be created by updateAllProbabilityEntries.
+ bool truncateEntries(const int *const entryCounts, const int *const maxEntryCounts,
+ const HeaderPolicy *const headerPolicy);
+
private:
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
+ class EntryInfoToTurncate {
+ public:
+ class Comparator {
+ public:
+ bool operator()(const EntryInfoToTurncate &left,
+ const EntryInfoToTurncate &right) const;
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(Comparator);
+ };
+
+ EntryInfoToTurncate(const int probability, const int timestamp, const int key,
+ const int entryLevel, const int *const prevWordIds);
+
+ int mProbability;
+ int mTimestamp;
+ int mKey;
+ int mEntryLevel;
+ int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(EntryInfoToTurncate);
+ };
+
TrieMap mTrieMap;
const bool mHasHistoricalInfo;
@@ -84,6 +123,13 @@ class LanguageModelDictContent {
int *const outNgramCount);
int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds);
int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
+ bool updateAllProbabilityEntriesInner(const int bitmapEntryIndex, const int level,
+ const HeaderPolicy *const headerPolicy, int *const outEntryCounts);
+ bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy,
+ const int maxEntryCount, const int targetLevel);
+ bool getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel,
+ const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
+ std::vector<EntryInfoToTurncate> *const outEntryInfo) const;
};
} // namespace latinime
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index fb6840ba6..b7c31bf75 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -161,29 +161,15 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
- if (originalProbabilityEntry.hasHistoricalInfo()) {
- const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
- originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
- const ProbabilityEntry probabilityEntry(originalProbabilityEntry.getFlags(),
- &historicalInfo);
- if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
- AKLOGE("Cannot write updated probability entry. terminalId: %d",
- toBeUpdatedPtNodeParams->getTerminalId());
- return false;
- }
- const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
- if (!isValid) {
- if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
- AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
- return false;
- }
- }
- *outNeedsToKeepPtNode = isValid;
- } else {
- // No need to update probability.
+ if (originalProbabilityEntry.isValid()) {
*outNeedsToKeepPtNode = true;
+ return true;
+ }
+ if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
+ return false;
}
+ *outNeedsToKeepPtNode = false;
return true;
}
@@ -380,6 +366,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
isTerminal, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
+// TODO: Move probability handling code to LanguageModelDictContent.
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry,
const ProbabilityEntry *const probabilityEntry) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 04e3018da..2ea248e86 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -127,21 +127,28 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
- BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == ptNodePos
- && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
- return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
- }
+ // TODO: Support n-gram.
+ const PtNodeParams prevWordPtNodeParams =
+ mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordsPtNodePos[0]);
+ const int prevWordTerminalId = prevWordPtNodeParams.getTerminalId();
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(
+ IntArrayView::fromObject(&prevWordTerminalId),
+ ptNodeParams.getTerminalId());
+ if (!probabilityEntry.isValid()) {
+ return NOT_A_PROBABILITY;
+ }
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ return ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
+ mHeaderPolicy);
+ } else {
+ return probabilityEntry.getProbability();
}
- return NOT_A_PROBABILITY;
}
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 4220312e0..d53575aa7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -85,6 +85,27 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
&shortcutPolicy);
+ int entryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
+ if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntries(headerPolicy,
+ entryCountTable)) {
+ AKLOGE("Failed to update probabilities in language model dict content.");
+ return false;
+ }
+ if (headerPolicy->isDecayingDict()) {
+ int maxEntryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
+ maxEntryCountTable[0] = headerPolicy->getMaxUnigramCount();
+ maxEntryCountTable[1] = headerPolicy->getMaxBigramCount();
+ for (size_t i = 2; i < NELEMS(maxEntryCountTable); ++i) {
+ // TODO: Have max n-gram count.
+ maxEntryCountTable[i] = headerPolicy->getMaxBigramCount();
+ }
+ if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries(entryCountTable,
+ maxEntryCountTable, headerPolicy)) {
+ AKLOGE("Failed to truncate entries in language model dict content.");
+ return false;
+ }
+ }
+
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners
@@ -187,6 +208,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return true;
}
+// TODO: Remove.
bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
@@ -227,6 +249,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
return true;
}
+// TODO: Remove.
bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
const TerminalPositionLookupTable *const terminalPosLookupTable =
mBuffers->getTerminalPositionLookupTable();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 833063c17..ecbe7922c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -31,7 +31,7 @@ uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) con
uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
int *const pos) const {
- const int value = readUint(size, *pos);
+ const uint32_t value = readUint(size, *pos);
*pos += size;
return value;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index c0a9fcb1d..4b3c98988 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -114,7 +114,7 @@ class ByteArrayUtils {
return buffer[(*pos)++];
}
- static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
+ static AK_FORCE_INLINE uint32_t readUint(const uint8_t *const buffer,
const int size, const int pos) {
// size must be in 1 to 4.
ASSERT(size >= 1 && size <= 4);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
index 6d91790b2..c2aeac211 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -84,6 +84,10 @@ class TrieMap {
return mValue;
}
+ AK_FORCE_INLINE int getNextLevelBitmapEntryIndex() const {
+ return mNextLevelBitmapEntryIndex;
+ }
+
private:
const TrieMap *const mTrieMap;
const int mKey;
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp
new file mode 100644
index 000000000..a1c310d8a
--- /dev/null
+++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+
+namespace latinime {
+namespace {
+
+TEST(ByteArrayUtilsTest, TestReadInt) {
+ const uint8_t buffer[] = { 0x1u, 0x8Au, 0x0u, 0xAAu };
+
+ EXPECT_EQ(0x01u, ByteArrayUtils::readUint8(buffer, 0));
+ EXPECT_EQ(0x8Au, ByteArrayUtils::readUint8(buffer, 1));
+ EXPECT_EQ(0x0u, ByteArrayUtils::readUint8(buffer, 2));
+ EXPECT_EQ(0xAAu, ByteArrayUtils::readUint8(buffer, 3));
+
+ EXPECT_EQ(0x018Au, ByteArrayUtils::readUint16(buffer, 0));
+ EXPECT_EQ(0x8A00u, ByteArrayUtils::readUint16(buffer, 1));
+ EXPECT_EQ(0xAAu, ByteArrayUtils::readUint16(buffer, 2));
+
+ EXPECT_EQ(0x18A00AAu, ByteArrayUtils::readUint32(buffer, 0));
+
+ int pos = 0;
+ EXPECT_EQ(0x18A00, ByteArrayUtils::readSint24AndAdvancePosition(buffer, &pos));
+ pos = 1;
+ EXPECT_EQ(-0xA00AA, ByteArrayUtils::readSint24AndAdvancePosition(buffer, &pos));
+}
+
+TEST(ByteArrayUtilsTest, TestWriteAndReadInt) {
+ uint8_t buffer[4];
+
+ int pos = 0;
+ const uint8_t data_1B = 0xC8;
+ ByteArrayUtils::writeUintAndAdvancePosition(buffer, data_1B, 1, &pos);
+ EXPECT_EQ(data_1B, ByteArrayUtils::readUint(buffer, 1, 0));
+
+ pos = 0;
+ const uint32_t data_4B = 0xABCD1234;
+ ByteArrayUtils::writeUintAndAdvancePosition(buffer, data_4B, 4, &pos);
+ EXPECT_EQ(data_4B, ByteArrayUtils::readUint(buffer, 4, 0));
+}
+
+TEST(ByteArrayUtilsTest, TestReadCodePoint) {
+ const uint8_t buffer[] = { 0x10, 0xFF, 0x00u, 0x20u, 0x41u, 0x1Fu, 0x60 };
+
+ EXPECT_EQ(0x10FF00, ByteArrayUtils::readCodePoint(buffer, 0));
+ EXPECT_EQ(0x20, ByteArrayUtils::readCodePoint(buffer, 3));
+ EXPECT_EQ(0x41, ByteArrayUtils::readCodePoint(buffer, 4));
+ EXPECT_EQ(NOT_A_CODE_POINT, ByteArrayUtils::readCodePoint(buffer, 5));
+
+ int pos = 0;
+ int codePointArray[3];
+ EXPECT_EQ(3, ByteArrayUtils::readStringAndAdvancePosition(buffer, MAX_WORD_LENGTH,
+ codePointArray, &pos));
+ EXPECT_EQ(0x10FF00, codePointArray[0]);
+ EXPECT_EQ(0x20, codePointArray[1]);
+ EXPECT_EQ(0x41, codePointArray[2]);
+ EXPECT_EQ(0x60, ByteArrayUtils::readCodePoint(buffer, pos));
+}
+
+TEST(ByteArrayUtilsTest, TestWriteAndReadCodePoint) {
+ uint8_t buffer[10];
+
+ const int codePointArray[] = { 0x10FF00, 0x20, 0x41 };
+ int pos = 0;
+ ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePointArray, 3,
+ true /* writesTerminator */, &pos);
+ EXPECT_EQ(0x10FF00, ByteArrayUtils::readCodePoint(buffer, 0));
+ EXPECT_EQ(0x20, ByteArrayUtils::readCodePoint(buffer, 3));
+ EXPECT_EQ(0x41, ByteArrayUtils::readCodePoint(buffer, 4));
+ EXPECT_EQ(NOT_A_CODE_POINT, ByteArrayUtils::readCodePoint(buffer, 5));
+}
+
+} // namespace
+} // namespace latinime