aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/NativeFileList.mk2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h97
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp2
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp60
8 files changed, 209 insertions, 7 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index 7299ed3c0..ca40ba8a5 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -72,6 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \
ver4_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
language_model_dict_content.cpp \
+ language_model_dict_content_global_counters.cpp \
shortcut_dict_content.cpp \
sparse_table_dict_content.cpp \
terminal_position_lookup_table.cpp) \
@@ -128,6 +129,7 @@ LATIN_IME_CORE_TEST_FILES := \
suggest/core/layout/normal_distribution_2d_test.cpp \
suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \
suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
+ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp \
suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index 509bd683b..05a3a6356 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -24,9 +24,11 @@
namespace latinime {
const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
+const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0;
+const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1;
bool LanguageModelDictContent::save(FILE *const file) const {
- return mTrieMap.save(file);
+ return mTrieMap.save(file) && mGlobalCounters.save(file);
}
bool LanguageModelDictContent::runGC(
@@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) {
return false;
}
+ mGlobalCounters.incrementTotalCount();
+ mGlobalCounters.updateMaxValueOfCounters(
+ updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
for (size_t i = 0; i < prevWordIds.size(); ++i) {
if (prevWordIds[i] == NOT_A_WORD_ID) {
break;
@@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
return false;
}
+ mGlobalCounters.updateMaxValueOfCounters(
+ updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
if (!originalNgramProbabilityEntry.isValid()) {
entryCountersToUpdate->incrementNgramCount(i + 2);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 1cccf92d2..5b92b96e3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/dictionary/word_attributes.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -131,15 +132,17 @@ class LanguageModelDictContent {
const ProbabilityEntry mProbabilityEntry;
};
- LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
+ LanguageModelDictContent(const ReadWriteByteArrayView *const buffers,
const bool hasHistoricalInfo)
- : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
+ : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]),
+ mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
explicit LanguageModelDictContent(const bool hasHistoricalInfo)
- : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
+ : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {}
bool isNearSizeLimit() const {
- return mTrieMap.isNearSizeLimit();
+ return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters();
}
bool save(FILE *const file) const;
@@ -218,8 +221,11 @@ class LanguageModelDictContent {
// TODO: Remove
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
+ static const int TRIE_MAP_BUFFER_INDEX;
+ static const int GLOBAL_COUNTERS_BUFFER_INDEX;
TrieMap mTrieMap;
+ LanguageModelDictContentGlobalCounters mGlobalCounters;
const bool mHasHistoricalInfo;
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp
new file mode 100644
index 000000000..9518ab419
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD =
+ (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64;
+const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30;
+const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4;
+const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0;
+const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h
new file mode 100644
index 000000000..9953aa425
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
+#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
+
+#include <cstdio>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/byte_array_view.h"
+
+namespace latinime {
+
+class LanguageModelDictContentGlobalCounters {
+ public:
+ explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer)
+ : mBuffer(buffer, 0 /* maxAdditionalBufferSize */),
+ mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)),
+ mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {}
+
+ LanguageModelDictContentGlobalCounters()
+ : mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {}
+
+ bool needsToHalveCounters() const {
+ return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD
+ || mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
+ }
+
+ int getTotalCount() const {
+ return mTotalCount;
+ }
+
+ bool save(FILE *const file) const {
+ BufferWithExtendableBuffer bufferToWrite(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES,
+ TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) {
+ return false;
+ }
+ if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES,
+ MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) {
+ return false;
+ }
+ return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite);
+ }
+
+ void incrementTotalCount() {
+ mTotalCount += 1;
+ }
+
+ void updateMaxValueOfCounters(const int count) {
+ mMaxValueOfCounters = std::max(count, mMaxValueOfCounters);
+ }
+
+ void halveCounters() {
+ mMaxValueOfCounters /= 2;
+ mTotalCount /= 2;
+ }
+
+private:
+ DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters);
+
+ const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD;
+ const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
+ const static int COUNTER_SIZE_IN_BYTES;
+ const static int TOTAL_COUNT_INDEX;
+ const static int MAX_VALUE_OF_COUNTERS_INDEX;
+
+ BufferWithExtendableBuffer mBuffer;
+ int mTotalCount;
+ int mMaxValueOfCounters;
+
+ static int readValue(const BufferWithExtendableBuffer &buffer, const int index) {
+ const int pos = COUNTER_SIZE_IN_BYTES * index;
+ if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) {
+ return 0;
+ }
+ return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos);
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 45f88e9b2..4d088dcab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -179,7 +179,7 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
- mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
+ mLanguageModelDictContent(&contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mIsUpdatable(mDictBuffer->isUpdatable()) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 8e6cb974b..eb6080a24 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -67,6 +67,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
-const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2;
} // namespace latinime
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp
new file mode 100644
index 000000000..44b5a8aaa
--- /dev/null
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
+
+#include <gtest/gtest.h>
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+namespace {
+
+TEST(LanguageModelDictContentGlobalCountersTest, TestUpdateMaxValueOfCounters) {
+ LanguageModelDictContentGlobalCounters globalCounters;
+
+ EXPECT_FALSE(globalCounters.needsToHalveCounters());
+ globalCounters.updateMaxValueOfCounters(10);
+ EXPECT_FALSE(globalCounters.needsToHalveCounters());
+ const int count = (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 1;
+ globalCounters.updateMaxValueOfCounters(count);
+ EXPECT_TRUE(globalCounters.needsToHalveCounters());
+ globalCounters.halveCounters();
+ EXPECT_FALSE(globalCounters.needsToHalveCounters());
+}
+
+TEST(LanguageModelDictContentGlobalCountersTest, TestIncrementTotalCount) {
+ LanguageModelDictContentGlobalCounters globalCounters;
+
+ EXPECT_EQ(0, globalCounters.getTotalCount());
+ globalCounters.incrementTotalCount();
+ EXPECT_EQ(1, globalCounters.getTotalCount());
+ for (int i = 1; i < 50; ++i) {
+ globalCounters.incrementTotalCount();
+ }
+ EXPECT_EQ(50, globalCounters.getTotalCount());
+ globalCounters.halveCounters();
+ EXPECT_EQ(25, globalCounters.getTotalCount());
+ globalCounters.halveCounters();
+ EXPECT_EQ(12, globalCounters.getTotalCount());
+ for (int i = 0; i < 4; ++i) {
+ globalCounters.halveCounters();
+ }
+ EXPECT_EQ(0, globalCounters.getTotalCount());
+}
+
+} // namespace
+} // namespace latinime