aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/NativeFileList.mk5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp282
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp219
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h128
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h78
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp91
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp136
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h43
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h2
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp20
-rw-r--r--native/jni/tests/utils/time_keeper_test.cpp38
21 files changed, 219 insertions, 1108 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index 5e662543e..018a34d18 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -61,7 +61,6 @@ LATIN_IME_CORE_SRC_FILES := \
ver2_patricia_trie_node_reader.cpp \
ver2_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
- bigram/ver4_bigram_list_policy.cpp \
ver4_dict_buffers.cpp \
ver4_dict_constants.cpp \
ver4_patricia_trie_node_reader.cpp \
@@ -71,7 +70,6 @@ LATIN_IME_CORE_SRC_FILES := \
ver4_patricia_trie_writing_helper.cpp \
ver4_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
- bigram_dict_content.cpp \
language_model_dict_content.cpp \
shortcut_dict_content.cpp \
sparse_table_dict_content.cpp \
@@ -132,4 +130,5 @@ LATIN_IME_CORE_TEST_FILES := \
suggest/policyimpl/dictionary/utils/sparse_table_test.cpp \
suggest/policyimpl/dictionary/utils/trie_map_test.cpp \
utils/autocorrection_threshold_utils_test.cpp \
- utils/int_array_view_test.cpp
+ utils/int_array_view_test.cpp \
+ utils/time_keeper_test.cpp
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
deleted file mode 100644
index 08dc107ab..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
-
-#include "suggest/core/dictionary/property/bigram_property.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const {
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
- if (outBigramPos) {
- // Lookup target PtNode position.
- *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
- bigramEntry.getTargetTerminalId());
- }
- if (outProbability) {
- if (bigramEntry.hasHistoricalInfo()) {
- *outProbability =
- ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
- mHeaderPolicy);
- } else {
- *outProbability = bigramEntry.getProbability();
- }
- }
- if (outHasNext) {
- *outHasNext = bigramEntry.hasNext();
- }
-}
-
-bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
- // 1. The word has no bigrams yet.
- // 2. The word has bigrams, and there is the target in the list.
- // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
- // 4. The word has bigrams. We have to append new bigram entry to the list.
- // 5. Same as 4, but the list is the last entry of the content file.
- if (outAddedNewEntry) {
- *outAddedNewEntry = false;
- }
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Case 1. PtNode that doesn't have a bigram list.
- // Create new bigram list.
- if (!mBigramDictContent->createNewBigramList(terminalId)) {
- return false;
- }
- const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
- newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
- bigramProperty);
- // Write an entry.
- int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
- &writingPos)) {
- AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
- return false;
- }
- if (!mBigramDictContent->writeTerminator(writingPos)) {
- AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
- return false;
- }
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- return true;
- }
-
- int tailEntryPos = NOT_A_DICT_POS;
- const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
- &tailEntryPos);
- if (entryPosToUpdate == NOT_A_DICT_POS) {
- // Case 4, 5. Add new entry to the bigram list.
- const int contentTailPos = mBigramDictContent->getContentTailPos();
- // If the tail entry is at the tail of content buffer, the new entry can be written without
- // link (Case 5).
- const bool canAppendEntry =
- contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
- const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
- int writingPos = newEntryPos;
- // Write new entry at the tail position of the bigram content.
- const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
- newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &newBigramEntry, bigramProperty);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
- &writingPos)) {
- AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
- return false;
- }
- if (!mBigramDictContent->writeTerminator(writingPos)) {
- AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
- return false;
- }
- if (!canAppendEntry) {
- // Update link of the current tail entry.
- if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
- AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
- tailEntryPos, newEntryPos);
- return false;
- }
- }
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- return true;
- }
-
- // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
- const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
- if (!originalBigramEntry.isValid()) {
- // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
- // entry is updated.
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- }
- const BigramEntry updatedBigramEntry =
- originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &updatedBigramEntry, bigramProperty);
- return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
-}
-
-bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list doesn't exist.
- return false;
- }
- const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
- nullptr /* outTailEntryPos */);
- if (entryPosToUpdate == NOT_A_DICT_POS) {
- // Bigram entry doesn't exist.
- return false;
- }
- const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
- if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
- // Bigram entry doesn't exist.
- return false;
- }
- // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
- const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
- return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
-}
-
-bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
- int *const outBigramCount) {
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list doesn't exist.
- return true;
- }
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
- hasNext = bigramEntry.hasNext();
- if (!bigramEntry.isValid()) {
- continue;
- }
- const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
- bigramEntry.getTargetTerminalId());
- if (targetPtNodePos == NOT_A_DICT_POS) {
- // Invalidate bigram entry.
- const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
- if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
- return false;
- }
- } else if (bigramEntry.hasHistoricalInfo()) {
- const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
- bigramEntry.getHistoricalInfo(), mHeaderPolicy);
- if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
- const BigramEntry updatedBigramEntry =
- bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
- if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
- return false;
- }
- *outBigramCount += 1;
- } else {
- // Remove entry.
- const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
- if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
- return false;
- }
- }
- } else {
- *outBigramCount += 1;
- }
- }
- return true;
-}
-
-int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list doesn't exist.
- return 0;
- }
- int bigramCount = 0;
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (bigramEntry.isValid()) {
- bigramCount++;
- }
- }
- return bigramCount;
-}
-
-int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
- const int bigramListPos, int *const outTailEntryPos) const {
- if (outTailEntryPos) {
- *outTailEntryPos = NOT_A_DICT_POS;
- }
- int invalidEntryPos = NOT_A_DICT_POS;
- int readingPos = bigramListPos;
- while (true) {
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
- if (!bigramEntry.hasNext()) {
- if (outTailEntryPos) {
- *outTailEntryPos = entryPos;
- }
- break;
- }
- if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
- // Entry with same target is found.
- return entryPos;
- } else if (!bigramEntry.isValid()) {
- // Invalid entry that can be reused is found.
- invalidEntryPos = entryPos;
- }
- }
- return invalidEntryPos;
-}
-
-const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
- const BigramEntry *const originalBigramEntry,
- const BigramProperty *const bigramProperty) const {
- // TODO: Consolidate historical info and probability.
- if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
- const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
- bigramProperty->getLevel(), bigramProperty->getCount());
- const HistoricalInfo updatedHistoricalInfo =
- ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
- &historicalInfoForUpdate, mHeaderPolicy);
- return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
- } else {
- return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
deleted file mode 100644
index 4b3bb3725..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
-#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
-
-namespace latinime {
-
-class BigramDictContent;
-class BigramProperty;
-class HeaderPolicy;
-class TerminalPositionLookupTable;
-
-class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
- public:
- Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
- const TerminalPositionLookupTable *const terminalPositionLookupTable,
- const HeaderPolicy *const headerPolicy)
- : mBigramDictContent(bigramDictContent),
- mTerminalPositionLookupTable(terminalPositionLookupTable),
- mHeaderPolicy(headerPolicy) {}
-
- void getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const;
-
- bool skipAllBigrams(int *const pos) const {
- // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
- return true;
- }
-
- bool addNewEntry(const int terminalId, const int newTargetTerminalId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
-
- bool removeEntry(const int terminalId, const int targetTerminalId);
-
- bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
- int *const outBigramCount);
-
- int getBigramEntryConut(const int terminalId);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
-
- int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
- int *const outTailEntryPos) const;
-
- const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
- const BigramProperty *const bigramProperty) const;
-
- BigramDictContent *const mBigramDictContent;
- const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
- const HeaderPolicy *const mHeaderPolicy;
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
deleted file mode 100644
index d7e1952b5..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
-
-const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
- int *const bigramEntryPos) const {
- const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
- const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
- if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
- AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
- "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
- bigramListBuffer->getTailPosition());
- ASSERT(false);
- return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
- Ver4DictConstants::NOT_A_TERMINAL_ID);
- }
- const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
- const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
- int probability = NOT_A_PROBABILITY;
- int timestamp = NOT_A_TIMESTAMP;
- int level = 0;
- int count = 0;
- if (mHasHistoricalInfo) {
- timestamp = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
- level = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
- count = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
- } else {
- probability = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
- }
- const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
- const int targetTerminalId =
- (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
- Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
- if (isLink) {
- const int linkedEntryPos = targetTerminalId;
- if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
- // Bigram list terminator is found.
- return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
- Ver4DictConstants::NOT_A_TERMINAL_ID);
- }
- *bigramEntryPos = linkedEntryPos;
- return getBigramEntryAndAdvancePosition(bigramEntryPos);
- }
- // hasNext is always true because we should continue to read the next entry until the terminator
- // is found.
- if (mHasHistoricalInfo) {
- const HistoricalInfo historicalInfo(timestamp, level, count);
- return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
- } else {
- return BigramEntry(true /* hasNext */, probability, targetTerminalId);
- }
-}
-
-bool BigramDictContent::writeBigramEntryAndAdvancePosition(
- const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
- return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
- bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
- bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
- bigramEntryToWrite->getHistoricalInfo()->getLevel(),
- bigramEntryToWrite->getHistoricalInfo()->getCount(),
- entryWritingPos);
-}
-
-bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
- const bool isLink, const int probability, const int targetTerminalId,
- const int timestamp, const int level, const int count, int *const entryWritingPos) {
- BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
- const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
- if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
- Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
- return false;
- }
- if (mHasHistoricalInfo) {
- if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
- timestamp);
- return false;
- }
- if (!bigramListBuffer->writeUintAndAdvancePosition(level,
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
- level);
- return false;
- }
- if (!bigramListBuffer->writeUintAndAdvancePosition(count,
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
- count);
- return false;
- }
- } else {
- if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
- Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
- probability);
- return false;
- }
- }
- const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
- Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
- if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
- *entryWritingPos, targetTerminalId);
- return false;
- }
- return true;
-}
-
-bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
- const int targetTerminalId = linkedEntryPos;
- int pos = writingPos;
- return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
- NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
- 0 /* count */, &pos);
-}
-
-bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const BigramDictContent *const originalBigramDictContent,
- int *const outBigramEntryCount) {
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const int originalBigramListPos =
- originalBigramDictContent->getBigramListHeadPos(it->first);
- if (originalBigramListPos == NOT_A_DICT_POS) {
- // This terminal does not have a bigram list.
- continue;
- }
- const int bigramListPos = getContentBuffer()->getTailPosition();
- int bigramEntryCount = 0;
- // Copy bigram list with GC from original content.
- if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
- terminalIdMap, &bigramEntryCount)) {
- AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
- originalBigramListPos, bigramListPos);
- return false;
- }
- if (bigramEntryCount == 0) {
- // All bigram entries are useless. This terminal does not have a bigram list.
- continue;
- }
- *outBigramEntryCount += bigramEntryCount;
- // Set bigram list position to the lookup table.
- if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
- AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
- it->second, bigramListPos);
- return false;
- }
- }
- return true;
-}
-
-// Returns whether GC for the bigram list was succeeded or not.
-bool BigramDictContent::runGCBigramList(const int bigramListPos,
- const BigramDictContent *const sourceBigramDictContent, const int toPos,
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- int *const outEntryCount) {
- bool hasNext = true;
- int readingPos = bigramListPos;
- int writingPos = toPos;
- while (hasNext) {
- const BigramEntry originalBigramEntry =
- sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = originalBigramEntry.hasNext();
- if (!originalBigramEntry.isValid()) {
- continue;
- }
- TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
- terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
- if (it == terminalIdMap->end()) {
- // Target word has been removed.
- continue;
- }
- const BigramEntry updatedBigramEntry =
- originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
- if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
- AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
- return false;
- }
- *outEntryCount += 1;
- }
- if (*outEntryCount > 0) {
- if (!writeTerminator(writingPos)) {
- AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
deleted file mode 100644
index 20bae5943..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
-#define LATINIME_BIGRAM_DICT_CONTENT_H
-
-#include <cstdio>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-
-namespace latinime {
-
-class ReadWriteByteArrayView;
-
-class BigramDictContent : public SparseTableDictContent {
- public:
- BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
- : SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
- mHasHistoricalInfo(hasHistoricalInfo) {}
-
- BigramDictContent(const bool hasHistoricalInfo)
- : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
- mHasHistoricalInfo(hasHistoricalInfo) {}
-
- int getContentTailPos() const {
- return getContentBuffer()->getTailPosition();
- }
-
- const BigramEntry getBigramEntry(const int bigramEntryPos) const {
- int readingPos = bigramEntryPos;
- return getBigramEntryAndAdvancePosition(&readingPos);
- }
-
- const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
-
- // Returns head position of bigram list for a PtNode specified by terminalId.
- int getBigramListHeadPos(const int terminalId) const {
- const SparseTable *const addressLookupTable = getAddressLookupTable();
- if (!addressLookupTable->contains(terminalId)) {
- return NOT_A_DICT_POS;
- }
- return addressLookupTable->get(terminalId);
- }
-
- bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
- int writingPos = getContentBuffer()->getTailPosition();
- return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
- }
-
- bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
- int writingPos = entryWritingPos;
- return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
- }
-
- bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
- int *const entryWritingPos);
-
- bool writeTerminator(const int writingPos) {
- // Terminator is a link to the invalid position.
- return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
- }
-
- bool writeLink(const int linkedPos, const int writingPos);
-
- bool createNewBigramList(const int terminalId) {
- const int bigramListPos = getContentBuffer()->getTailPosition();
- return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
- }
-
- bool flushToFile(FILE *const file) const {
- return flush(file);
- }
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const BigramDictContent *const originalBigramDictContent,
- int *const outBigramEntryCount);
-
- int getBigramEntrySize() const {
- if (mHasHistoricalInfo) {
- return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
- + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
- + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
- + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
- + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
- } else {
- return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE
- + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
- }
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
-
- static const int INVALID_LINKED_ENTRY_POS;
-
- bool writeBigramEntryAttributesAndAdvancePosition(
- const bool isLink, const int probability, const int targetTerminalId,
- const int timestamp, const int level, const int count, int *const entryWritingPos);
-
- bool runGCBigramList(const int bigramListPos,
- const BigramDictContent *const sourceBigramDictContent, const int toPos,
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- int *const outEntryCount);
-
- bool mHasHistoricalInfo;
-};
-} // namespace latinime
-#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
deleted file mode 100644
index 2b0cbd93b..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_ENTRY_H
-#define LATINIME_BIGRAM_ENTRY_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
-
-namespace latinime {
-
-class BigramEntry {
- public:
- BigramEntry(const BigramEntry& bigramEntry)
- : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
- mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
-
- // Entry with historical information.
- BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
- : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
- mTargetTerminalId(targetTerminalId) {}
-
- // Entry with historical information.
- BigramEntry(const bool hasNext, const int probability,
- const HistoricalInfo *const historicalInfo, const int targetTerminalId)
- : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
- mTargetTerminalId(targetTerminalId) {}
-
- const BigramEntry getInvalidatedEntry() const {
- return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
- }
-
- const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
- return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
- }
-
- const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
- return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
- }
-
- const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
- return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
- }
-
- const BigramEntry updateHistoricalInfoAndGetEntry(
- const HistoricalInfo *const historicalInfo) const {
- return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
- }
-
- bool isValid() const {
- return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
- }
-
- bool hasNext() const {
- return mHasNext;
- }
-
- int getProbability() const {
- return mProbability;
- }
-
- bool hasHistoricalInfo() const {
- return mHistoricalInfo.isValid();
- }
-
- const HistoricalInfo *getHistoricalInfo() const {
- return &mHistoricalInfo;
- }
-
- int getTargetTerminalId() const {
- return mTargetTerminalId;
- }
-
- private:
- // Copy constructor is public to use this class as a type of return value.
- DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
- DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
-
- const bool mHasNext;
- const int mProbability;
- const HistoricalInfo mHistoricalInfo;
- const int mTargetTerminalId;
-};
-} // namespace latinime
-#endif /* LATINIME_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index ea2d24e67..d5749e9eb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -23,6 +23,9 @@
namespace latinime {
+const int LanguageModelDictContent::UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE = 0;
+const int LanguageModelDictContent::BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE = 1;
+
bool LanguageModelDictContent::save(FILE *const file) const {
return mTrieMap.save(file);
}
@@ -71,13 +74,22 @@ bool LanguageModelDictContent::removeNgramProbabilityEntry(const WordIdArrayView
return mTrieMap.remove(wordId, bitmapEntryIndex);
}
+LanguageModelDictContent::EntryRange LanguageModelDictContent::getProbabilityEntries(
+ const WordIdArrayView prevWordIds) const {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ return EntryRange(mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex), mHasHistoricalInfo);
+}
+
bool LanguageModelDictContent::truncateEntries(const int *const entryCounts,
- const int *const maxEntryCounts, const HeaderPolicy *const headerPolicy) {
+ const int *const maxEntryCounts, const HeaderPolicy *const headerPolicy,
+ int *const outEntryCounts) {
for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) {
if (entryCounts[i] <= maxEntryCounts[i]) {
+ outEntryCounts[i] = entryCounts[i];
continue;
}
- if (!turncateEntriesInSpecifiedLevel(headerPolicy, maxEntryCounts[i], i)) {
+ if (!turncateEntriesInSpecifiedLevel(headerPolicy, maxEntryCounts[i], i,
+ &outEntryCounts[i])) {
return false;
}
}
@@ -179,7 +191,8 @@ bool LanguageModelDictContent::updateAllProbabilityEntriesInner(const int bitmap
}
bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel(
- const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel) {
+ const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel,
+ int *const outEntryCount) {
std::vector<int> prevWordIds;
std::vector<EntryInfoToTurncate> entryInfoVector;
if (!getEntryInfo(headerPolicy, targetLevel, mTrieMap.getRootBitmapEntryIndex(),
@@ -187,8 +200,10 @@ bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel(
return false;
}
if (static_cast<int>(entryInfoVector.size()) <= maxEntryCount) {
+ *outEntryCount = static_cast<int>(entryInfoVector.size());
return true;
}
+ *outEntryCount = maxEntryCount;
const int entryCountToRemove = static_cast<int>(entryInfoVector.size()) - maxEntryCount;
std::partial_sort(entryInfoVector.begin(), entryInfoVector.begin() + entryCountToRemove,
entryInfoVector.end(),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 43b2aab66..aa612e35a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -39,6 +39,78 @@ class HeaderPolicy;
*/
class LanguageModelDictContent {
public:
+ static const int UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE;
+ static const int BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE;
+
+ // Pair of word id and probability entry used for iteration.
+ class WordIdAndProbabilityEntry {
+ public:
+ WordIdAndProbabilityEntry(const int wordId, const ProbabilityEntry &probabilityEntry)
+ : mWordId(wordId), mProbabilityEntry(probabilityEntry) {}
+
+ int getWordId() const { return mWordId; }
+ const ProbabilityEntry getProbabilityEntry() const { return mProbabilityEntry; }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(WordIdAndProbabilityEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(WordIdAndProbabilityEntry);
+
+ const int mWordId;
+ const ProbabilityEntry mProbabilityEntry;
+ };
+
+ // Iterator.
+ class EntryIterator {
+ public:
+ EntryIterator(const TrieMap::TrieMapIterator &trieMapIterator,
+ const bool hasHistoricalInfo)
+ : mTrieMapIterator(trieMapIterator), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ const WordIdAndProbabilityEntry operator*() const {
+ const TrieMap::TrieMapIterator::IterationResult &result = *mTrieMapIterator;
+ return WordIdAndProbabilityEntry(
+ result.key(), ProbabilityEntry::decode(result.value(), mHasHistoricalInfo));
+ }
+
+ bool operator!=(const EntryIterator &other) const {
+ return mTrieMapIterator != other.mTrieMapIterator;
+ }
+
+ const EntryIterator &operator++() {
+ ++mTrieMapIterator;
+ return *this;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(EntryIterator);
+ DISALLOW_ASSIGNMENT_OPERATOR(EntryIterator);
+
+ TrieMap::TrieMapIterator mTrieMapIterator;
+ const bool mHasHistoricalInfo;
+ };
+
+ // Class represents range to use range base for loops.
+ class EntryRange {
+ public:
+ EntryRange(const TrieMap::TrieMapRange trieMapRange, const bool hasHistoricalInfo)
+ : mTrieMapRange(trieMapRange), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ EntryIterator begin() const {
+ return EntryIterator(mTrieMapRange.begin(), mHasHistoricalInfo);
+ }
+
+ EntryIterator end() const {
+ return EntryIterator(mTrieMapRange.end(), mHasHistoricalInfo);
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(EntryRange);
+ DISALLOW_ASSIGNMENT_OPERATOR(EntryRange);
+
+ const TrieMap::TrieMapRange mTrieMapRange;
+ const bool mHasHistoricalInfo;
+ };
+
LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
const bool hasHistoricalInfo)
: mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
@@ -76,6 +148,8 @@ class LanguageModelDictContent {
bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId);
+ EntryRange getProbabilityEntries(const WordIdArrayView prevWordIds) const;
+
bool updateAllProbabilityEntries(const HeaderPolicy *const headerPolicy,
int *const outEntryCounts) {
for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) {
@@ -87,7 +161,7 @@ class LanguageModelDictContent {
// entryCounts should be created by updateAllProbabilityEntries.
bool truncateEntries(const int *const entryCounts, const int *const maxEntryCounts,
- const HeaderPolicy *const headerPolicy);
+ const HeaderPolicy *const headerPolicy, int *const outEntryCounts);
private:
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
@@ -126,7 +200,7 @@ class LanguageModelDictContent {
bool updateAllProbabilityEntriesInner(const int bitmapEntryIndex, const int level,
const HeaderPolicy *const headerPolicy, int *const outEntryCounts);
bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy,
- const int maxEntryCount, const int targetLevel);
+ const int maxEntryCount, const int targetLevel, int *const outEntryCount);
bool getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel,
const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
std::vector<EntryInfoToTurncate> *const outEntryInfo) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 1f40e3dd2..45f88e9b2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -159,11 +159,6 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
AKLOGE("Language model dict content cannot be written.");
return false;
}
- // Write bigram dict content.
- if (!mBigramDictContent.flushToFile(file)) {
- AKLOGE("Bigram dict content cannot be written.");
- return false;
- }
// Write shortcut dict content.
if (!mShortcutDictContent.flushToFile(file)) {
AKLOGE("Shortcut dict content cannot be written.");
@@ -186,8 +181,6 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
- mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
- mHeaderPolicy.hasHistoricalInfoOfWords()),
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mIsUpdatable(mDictBuffer->isUpdatable()) {}
@@ -196,7 +189,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
- mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
- mIsUpdatable(true) {}
+ mShortcutDictContent(), mIsUpdatable(true) {}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 70a7983f1..5407525af 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -22,7 +22,6 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
@@ -53,7 +52,6 @@ class Ver4DictBuffers {
return mExpandableTrieBuffer.isNearSizeLimit()
|| mTerminalPositionLookupTable.isNearSizeLimit()
|| mLanguageModelDictContent.isNearSizeLimit()
- || mBigramDictContent.isNearSizeLimit()
|| mShortcutDictContent.isNearSizeLimit();
}
@@ -89,14 +87,6 @@ class Ver4DictBuffers {
return &mLanguageModelDictContent;
}
- AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
- return &mBigramDictContent;
- }
-
- AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
- return &mBigramDictContent;
- }
-
AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
return &mShortcutDictContent;
}
@@ -135,7 +125,6 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable;
LanguageModelDictContent mLanguageModelDictContent;
- BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent;
const int mIsUpdatable;
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index b085a6661..9acf2d44f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -29,20 +29,18 @@ const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
-// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
+// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for shortcut.
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
+ NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
- + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
+ + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
-const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
- LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
- BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
+ LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
@@ -56,21 +54,9 @@ const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
-const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
-const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
-const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
-// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
-// invalid terminal ID in bigram lists.
-const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
- (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
-const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
-const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
-const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
-const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
-
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 230b3052d..97035311e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -52,19 +52,9 @@ class Ver4DictConstants {
// Flags in probability entry.
static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
- static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
- static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
- static const int BIGRAM_FLAGS_FIELD_SIZE;
- static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
- static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
- static const int BIGRAM_IS_LINK_MASK;
- static const int BIGRAM_PROBABILITY_MASK;
- // Used when bigram list has time stamp.
- static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
-
static const int SHORTCUT_FLAGS_FIELD_SIZE;
static const int SHORTCUT_PROBABILITY_MASK;
static const int SHORTCUT_HAS_NEXT_MASK;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index b7c31bf75..9ca712470 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -21,7 +21,6 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
@@ -230,12 +229,6 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
if (!probabilityEntry.isValid() && outAddedNewBigram) {
*outAddedNewBigram = true;
}
- // TODO: Remove.
- if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
- AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d",
- prevWordIds[0], wordId);
- return false;
- }
return true;
}
@@ -244,19 +237,15 @@ bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWord
// TODO: Support n-gram.
LanguageModelDictContent *const languageModelDictContent =
mBuffers->getMutableLanguageModelDictContent();
- if (!languageModelDictContent->removeNgramProbabilityEntry(prevWordIds.limit(1 /* maxSize */),
- wordId)) {
- // TODO: Uncomment.
- // return false;
- }
- // TODO: Remove.
- return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
+ return languageModelDictContent->removeNgramProbabilityEntry(prevWordIds.limit(1 /* maxSize */),
+ wordId);
}
+// TODO: Remove when we stop supporting v402 format.
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
- return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
- sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
+ // Do nothing.
+ return true;
}
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
@@ -291,12 +280,6 @@ bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
return false;
}
-
- // Counts bigram entries.
- if (outBigramEntryCount) {
- *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
- toBeUpdatedPtNodeParams->getTerminalId());
- }
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 5d73b6ea3..08b7d3825 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -27,7 +27,6 @@ namespace latinime {
class BufferWithExtendableBuffer;
class HeaderPolicy;
-class Ver4BigramListPolicy;
class Ver4DictBuffers;
class Ver4PatriciaTrieNodeReader;
class Ver4PtNodeArrayReader;
@@ -42,10 +41,9 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
const PtNodeReader *const ptNodeReader,
const PtNodeArrayReader *const ptNodeArrayReader,
- Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
+ Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
- mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
- mShortcutPolicy(shortcutPolicy) {}
+ mReadingHelper(ptNodeReader, ptNodeArrayReader), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@@ -114,7 +112,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
DynamicPtReadingHelper mReadingHelper;
- Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 5eb2d3fe8..ae3208cfe 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -159,11 +159,21 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod
if (!prevWordsPtNodePos) {
return;
}
- const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
- BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
+ // TODO: Support n-gram.
+ const PtNodeParams ptNodeParams =
+ mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordsPtNodePos[0]);
+ const int prevWordId = ptNodeParams.getTerminalId();
+ const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&prevWordId);
+ const auto languageModelDictContent = mBuffers->getLanguageModelDictContent();
+ for (const auto entry : languageModelDictContent->getProbabilityEntries(prevWordIds)) {
+ const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mHeaderPolicy) :
+ probabilityEntry.getProbability();
+ const int ptNodePos = mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(
+ entry.getWordId());
+ listener->onVisitEntry(probability, ptNodePos);
}
}
@@ -179,18 +189,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
-int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return mBuffers->getBigramDictContent()->getBigramListHeadPos(
- ptNodeParams.getTerminalId());
-}
-
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
if (!mBuffers->isUpdatable()) {
@@ -471,41 +469,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
+ // TODO: Support n-gram.
std::vector<BigramProperty> bigrams;
- const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
- if (bigramListPos != NOT_A_DICT_POS) {
- int bigramWord1CodePoints[MAX_WORD_LENGTH];
- const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
- const TerminalPositionLookupTable *const terminalPositionLookupTable =
- mBuffers->getTerminalPositionLookupTable();
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const BigramEntry bigramEntry =
- bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- const int word1TerminalId = bigramEntry.getTargetTerminalId();
- const int word1TerminalPtNodePos =
- terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
- if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
- continue;
- }
- // Word (unigram) probability
- int word1Probability = NOT_A_PROBABILITY;
- const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
- &word1Probability);
- const std::vector<int> word1(bigramWord1CodePoints,
- bigramWord1CodePoints + codePointCount);
- const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
- const int probability = bigramEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(
- bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
- bigramEntry.getProbability();
- bigrams.emplace_back(&word1, probability,
- historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount());
- }
+ const int wordId = ptNodeParams.getTerminalId();
+ const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId);
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
+ prevWordIds)) {
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId());
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
+ probabilityEntry.getProbability();
+ bigrams.emplace_back(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount());
}
// Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index e46803ffe..90e06c7f9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -23,7 +23,6 @@
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
-#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
@@ -43,14 +42,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
: mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
mDictBuffer(mBuffers->getWritableTrieBuffer()),
- mBigramPolicy(mBuffers->getMutableBigramDictContent(),
- mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
- &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
+ &mPtNodeArrayReader, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
@@ -133,7 +130,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mDictBuffer;
- Ver4BigramListPolicy mBigramPolicy;
Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader;
Ver4PtNodeArrayReader mPtNodeArrayReader;
@@ -144,8 +140,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
-
- int getBigramsPositionOfPtNode(const int ptNodePos) const;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index d53575aa7..63e43a544 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -20,7 +20,6 @@
#include <queue>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -77,13 +76,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
- Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
- mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
- mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
- &shortcutPolicy);
+ mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
int entryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntries(headerPolicy,
@@ -93,14 +89,16 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
}
if (headerPolicy->isDecayingDict()) {
int maxEntryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
- maxEntryCountTable[0] = headerPolicy->getMaxUnigramCount();
- maxEntryCountTable[1] = headerPolicy->getMaxBigramCount();
+ maxEntryCountTable[LanguageModelDictContent::UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE] =
+ headerPolicy->getMaxUnigramCount();
+ maxEntryCountTable[LanguageModelDictContent::BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE] =
+ headerPolicy->getMaxBigramCount();
for (size_t i = 2; i < NELEMS(maxEntryCountTable); ++i) {
// TODO: Have max n-gram count.
maxEntryCountTable[i] = headerPolicy->getMaxBigramCount();
}
if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries(entryCountTable,
- maxEntryCountTable, headerPolicy)) {
+ maxEntryCountTable, headerPolicy, entryCountTable)) {
AKLOGE("Failed to truncate entries in language model dict content.");
return false;
}
@@ -116,16 +114,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
- const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount();
- const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
- if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
- if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
- AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
- maxUnigramCount);
- return false;
- }
- }
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
@@ -134,21 +122,12 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) {
return false;
}
- const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
- const int maxBigramCount = headerPolicy->getMaxBigramCount();
- if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
- if (!truncateBigrams(maxBigramCount)) {
- AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
- return false;
- }
- }
// Mapping from positions in mBuffer to positions in bufferToWrite.
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
- &shortcutPolicy);
+ buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@@ -161,12 +140,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
- Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
- buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader,
&newShortcutPolicy);
// Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
@@ -179,11 +156,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
return false;
}
- // Run GC for bigram dict content.
- if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
- mBuffers->getBigramDictContent(), outBigramCount)) {
- return false;
- }
// Run GC for shortcut dict content.
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
mBuffers->getShortcutDictContent())) {
@@ -204,94 +176,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
return false;
}
- *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
- return true;
-}
-
-// TODO: Remove.
-bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
- const Ver4PatriciaTrieNodeReader *const ptNodeReader,
- Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
- const TerminalPositionLookupTable *const terminalPosLookupTable =
- mBuffers->getTerminalPositionLookupTable();
- const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
- std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
- priorityQueue;
- for (int i = 0; i < nextTerminalId; ++i) {
- const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
- if (terminalPos == NOT_A_DICT_POS) {
- continue;
- }
- const ProbabilityEntry probabilityEntry =
- mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
- const int probability = probabilityEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(
- probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
- probabilityEntry.getProbability();
- priorityQueue.push(DictProbability(terminalPos, probability,
- probabilityEntry.getHistoricalInfo()->getTimeStamp()));
- }
-
- // Delete unigrams.
- while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
- const int ptNodePos = priorityQueue.top().getDictPos();
- priorityQueue.pop();
- const PtNodeParams ptNodeParams =
- ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
- if (ptNodeParams.representsNonWordInfo()) {
- continue;
- }
- if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
- AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
- return false;
- }
- }
- return true;
-}
-
-// TODO: Remove.
-bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
- const TerminalPositionLookupTable *const terminalPosLookupTable =
- mBuffers->getTerminalPositionLookupTable();
- const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
- std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
- priorityQueue;
- BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
- for (int i = 0; i < nextTerminalId; ++i) {
- const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
- if (bigramListPos == NOT_A_DICT_POS) {
- continue;
- }
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const BigramEntry bigramEntry =
- bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- const int entryPos = readingPos - bigramDictContent->getBigramEntrySize();
- hasNext = bigramEntry.hasNext();
- if (!bigramEntry.isValid()) {
- continue;
- }
- const int probability = bigramEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(
- bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
- bigramEntry.getProbability();
- priorityQueue.push(DictProbability(entryPos, probability,
- bigramEntry.getHistoricalInfo()->getTimeStamp()));
- }
- }
-
- // Delete bigrams.
- while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
- const int entryPos = priorityQueue.top().getDictPos();
- const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
- const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
- if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
- AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
- return false;
- }
- priorityQueue.pop();
- }
+ *outUnigramCount =
+ entryCountTable[LanguageModelDictContent::UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE];
+ *outBigramCount =
+ entryCountTable[LanguageModelDictContent::BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE];
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
index bb464ad28..b6278c4cb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -66,49 +66,6 @@ class Ver4PatriciaTrieWritingHelper {
const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
};
- // For truncateUnigrams() and truncateBigrams().
- class DictProbability {
- public:
- DictProbability(const int dictPos, const int probability, const int timestamp)
- : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
-
- int getDictPos() const {
- return mDictPos;
- }
-
- int getProbability() const {
- return mProbability;
- }
-
- int getTimestamp() const {
- return mTimestamp;
- }
-
- private:
- DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
-
- int mDictPos;
- int mProbability;
- int mTimestamp;
- };
-
- // For truncateUnigrams() and truncateBigrams().
- class DictProbabilityComparator {
- public:
- bool operator()(const DictProbability &left, const DictProbability &right) {
- if (left.getProbability() != right.getProbability()) {
- return left.getProbability() > right.getProbability();
- }
- if (left.getTimestamp() != right.getTimestamp()) {
- return left.getTimestamp() < right.getTimestamp();
- }
- return left.getDictPos() > right.getDictPos();
- }
-
- private:
- DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
- };
-
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
int *const outBigramCount);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
index c2aeac211..00765888b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -98,7 +98,7 @@ class TrieMap {
TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
: mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
- if (!trieMap) {
+ if (!trieMap || mBaseBitmapEntryIndex == INVALID_INDEX) {
return;
}
const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
index 3cacba1c3..ca8d56f27 100644
--- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
@@ -18,6 +18,8 @@
#include <gtest/gtest.h>
+#include <unordered_set>
+
#include "utils/int_array_view.h"
namespace latinime {
@@ -69,5 +71,23 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
EXPECT_TRUE(LanguageModelDictContent.removeProbabilityEntry(wordId));
}
+TEST(LanguageModelDictContentTest, TestIterateProbabilityEntry) {
+ LanguageModelDictContent languageModelDictContent(false /* useHistoricalInfo */);
+
+ const ProbabilityEntry originalEntry(0xFC, 100);
+
+ const int wordIds[] = { 1, 2, 3, 4, 5 };
+ for (const int wordId : wordIds) {
+ languageModelDictContent.setProbabilityEntry(wordId, &originalEntry);
+ }
+ std::unordered_set<int> wordIdSet(std::begin(wordIds), std::end(wordIds));
+ for (const auto entry : languageModelDictContent.getProbabilityEntries(WordIdArrayView())) {
+ EXPECT_EQ(originalEntry.getFlags(), entry.getProbabilityEntry().getFlags());
+ EXPECT_EQ(originalEntry.getProbability(), entry.getProbabilityEntry().getProbability());
+ wordIdSet.erase(entry.getWordId());
+ }
+ EXPECT_TRUE(wordIdSet.empty());
+}
+
} // namespace
} // namespace latinime
diff --git a/native/jni/tests/utils/time_keeper_test.cpp b/native/jni/tests/utils/time_keeper_test.cpp
new file mode 100644
index 000000000..3f54b91f1
--- /dev/null
+++ b/native/jni/tests/utils/time_keeper_test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/time_keeper.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace {
+
+TEST(TimeKeeperTest, TestTestMode) {
+ TimeKeeper::setCurrentTime();
+ const int startTime = TimeKeeper::peekCurrentTime();
+ static const int TEST_CURRENT_TIME = 100;
+ TimeKeeper::startTestModeWithForceCurrentTime(TEST_CURRENT_TIME);
+ EXPECT_EQ(TEST_CURRENT_TIME, TimeKeeper::peekCurrentTime());
+ TimeKeeper::setCurrentTime();
+ EXPECT_EQ(TEST_CURRENT_TIME, TimeKeeper::peekCurrentTime());
+ TimeKeeper::stopTestMode();
+ TimeKeeper::setCurrentTime();
+ EXPECT_LE(startTime, TimeKeeper::peekCurrentTime());
+}
+
+} // namespace
+} // namespace latinime