aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h2
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp2
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h2
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp33
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h13
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary_utils.cpp96
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary_utils.h44
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/property/unigram_property.h21
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h6
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.cpp7
-rw-r--r--native/jni/src/suggest/core/session/prev_words_info.h66
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h)20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp)36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h)37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h)10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h)16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h)16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h)14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h)14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp)8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h)14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h)14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp)7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h)20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h)10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h)18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp)16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h)20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp)102
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h)40
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h)14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp)25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h)16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h)14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h4
-rw-r--r--native/jni/src/utils/char_utils.h14
63 files changed, 669 insertions, 371 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index ef03d2b6d..92f39ea25 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -125,7 +125,7 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- void initAsPassingChild(DicNode *parentDicNode) {
+ void initAsPassingChild(const DicNode *parentDicNode) {
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
const int codePoint =
parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index bf2a0000d..4445f4aaf 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -48,7 +48,7 @@ namespace latinime {
///////////////////////////////////
// Traverse node expansion utils //
///////////////////////////////////
-/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
+/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 0d60e5796..00e80c604 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -35,7 +35,7 @@ class DicNodeUtils {
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
- static void getAllChildDicNodes(DicNode *dicNode,
+ static void getAllChildDicNodes(const DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index cb28e57d8..54cde1988 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -52,7 +52,7 @@ class DicNodeVector {
return static_cast<int>(mDicNodes.size());
}
- void pushPassingChild(DicNode *dicNode) {
+ void pushPassingChild(const DicNode *dicNode) {
ASSERT(!mLock);
mDicNodes.emplace_back();
mDicNodes.back().initAsPassingChild(dicNode);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fe3167a61..f88388c75 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -19,6 +19,7 @@
#include "suggest/core/dictionary/dictionary.h"
#include "defines.h"
+#include "suggest/core/dictionary/dictionary_utils.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -74,38 +75,50 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}
+int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
+ TimeKeeper::setCurrentTime();
+ return DictionaryUtils::getMaxProbabilityOfExactMatches(
+ mDictionaryStructureWithBufferPolicy.get(), word, length);
+}
+
int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
int length) const {
TimeKeeper::setCurrentTime();
return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
}
-void Dictionary::addUnigramEntry(const int *const word, const int length,
+bool Dictionary::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
+ if (unigramProperty->representsBeginningOfSentence()
+ && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+ ->supportsBeginningOfSentence()) {
+ AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+ return false;
+ }
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
+ return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
}
-void Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) {
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
}
-void Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const int *const word, const int length) {
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
+ return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
}
-void Dictionary::flush(const char *const filePath) {
+bool Dictionary::flush(const char *const filePath) {
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->flush(filePath);
+ return mDictionaryStructureWithBufferPolicy->flush(filePath);
}
-void Dictionary::flushWithGC(const char *const filePath) {
+bool Dictionary::flushWithGC(const char *const filePath) {
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+ return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 817d9f7fc..10010b21c 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -57,6 +57,7 @@ class Dictionary {
static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
+ static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
dictionaryStructureWithBufferPolicy);
@@ -72,21 +73,23 @@ class Dictionary {
int getProbability(const int *word, int length) const;
+ int getMaxProbabilityOfExactMatches(const int *word, int length) const;
+
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word, int length) const;
- void addUnigramEntry(const int *const codePoints, const int codePointCount,
+ bool addUnigramEntry(const int *const codePoints, const int codePointCount,
const UnigramProperty *const unigramProperty);
- void addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty);
- void removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
const int length);
- void flush(const char *const filePath);
+ bool flush(const char *const filePath);
- void flushWithGC(const char *const filePath);
+ bool flushWithGC(const char *const filePath);
bool needsToRunGC(const bool mindsBlockByGC);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
new file mode 100644
index 000000000..b94966cbe
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/dictionary_utils.h"
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_priority_queue.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const codePoints, const int codePointCount) {
+ std::vector<DicNode> current;
+ std::vector<DicNode> next;
+
+ // No prev words information.
+ PrevWordsInfo emptyPrevWordsInfo;
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ emptyPrevWordsInfo.getPrevWordsTerminalPtNodePos(dictionaryStructurePolicy,
+ prevWordsPtNodePos, false /* tryLowerCaseSearch */);
+ current.emplace_back();
+ DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordsPtNodePos, &current.front());
+ for (int i = 0; i < codePointCount; ++i) {
+ // The base-lower input is used to ignore case errors and accent errors.
+ const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]);
+ for (const DicNode &dicNode : current) {
+ if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) {
+ next.emplace_back(dicNode);
+ next.back().advanceDigraphIndex();
+ continue;
+ }
+ processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next);
+ }
+ current.clear();
+ current.swap(next);
+ }
+
+ int maxProbability = NOT_A_PROBABILITY;
+ for (const DicNode &dicNode : current) {
+ if (!dicNode.isTerminalDicNode()) {
+ continue;
+ }
+ // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
+ maxProbability = std::max(maxProbability, dicNode.getProbability());
+ }
+ return maxProbability;
+}
+
+/* static */ void DictionaryUtils::processChildDicNodes(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int inputCodePoint, const DicNode *const parentDicNode,
+ std::vector<DicNode> *const outDicNodes) {
+ DicNodeVector childDicNodes;
+ DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
+ for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
+ DicNode *const childDicNode = childDicNodes[childIndex];
+ const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
+ if (inputCodePoint == codePoint) {
+ outDicNodes->emplace_back(*childDicNode);
+ }
+ if (childDicNode->canBeIntentionalOmission()) {
+ processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
+ outDicNodes);
+ }
+ if (DigraphUtils::hasDigraphForCodePoint(
+ dictionaryStructurePolicy->getHeaderStructurePolicy(),
+ childDicNode->getNodeCodePoint())) {
+ childDicNode->advanceDigraphIndex();
+ if (childDicNode->getNodeCodePoint() == codePoint) {
+ childDicNode->advanceDigraphIndex();
+ outDicNodes->emplace_back(*childDicNode);
+ }
+ }
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.h b/native/jni/src/suggest/core/dictionary/dictionary_utils.h
new file mode 100644
index 000000000..358ebf674
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_UTILS_H
+#define LATINIME_DICTIONARY_UTILS_H
+
+#include <vector>
+
+#include "defines.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicy;
+class DicNode;
+
+class DictionaryUtils {
+ public:
+ static int getMaxProbabilityOfExactMatches(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const codePoints, const int codePointCount);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryUtils);
+
+ static void processChildDicNodes(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int inputCodePoint, const DicNode *const parentDicNode,
+ std::vector<DicNode> *const outDicNodes);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
index 0635fef7e..b6bf7a98c 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -31,4 +31,8 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+const ErrorTypeUtils::ErrorType
+ ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
+ ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION;
+
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index 0e8e5b635..e3e76b238 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -51,6 +51,11 @@ class ErrorTypeUtils {
return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
}
+ static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes
+ & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
+ }
+
static bool isEditCorrectionError(const ErrorType errorType) {
return (errorType & EDIT_CORRECTION) != 0;
}
@@ -67,6 +72,7 @@ class ErrorTypeUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
};
} // namespace latinime
#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d2551057b..902eb000f 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -48,15 +48,21 @@ class UnigramProperty {
};
UnigramProperty()
- : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
- mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
-
- UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, const int level, const int count,
- const std::vector<ShortcutProperty> *const shortcuts)
- : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+ mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+ mShortcuts() {}
+
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp, const int level,
+ const int count, const std::vector<ShortcutProperty> *const shortcuts)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
+ bool representsBeginningOfSentence() const {
+ return mRepresentsBeginningOfSentence;
+ }
+
bool isNotAWord() const {
return mIsNotAWord;
}
@@ -94,6 +100,7 @@ class UnigramProperty {
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
// TODO: Make members const.
+ bool mRepresentsBeginningOfSentence;
bool mIsNotAWord;
bool mIsBlacklisted;
int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 845e629e6..a61227626 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy {
virtual const std::vector<int> *getLocale() const = 0;
+ virtual bool supportsBeginningOfSentence() const = 0;
+
protected:
DictionaryHeaderStructurePolicy() {}
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 3fd815f98..cda89406c 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -81,9 +81,11 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const int *const word, const int length) = 0;
- virtual void flush(const char *const filePath) = 0;
+ // Returns whether the flush was success or not.
+ virtual bool flush(const char *const filePath) = 0;
- virtual void flushWithGC(const char *const filePath) = 0;
+ // Returns whether the GC and flush were success or not.
+ virtual bool flushWithGC(const char *const filePath) = 0;
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
index a307cb45d..23908255b 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -89,6 +89,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
const bool isExactMatch =
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
+ const bool isExactMatchWithIntentionalOmission =
+ ErrorTypeUtils::isExactMatchWithIntentionalOmission(
+ terminalDicNode->getContainedErrorTypes());
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
// (e.g. "AMD" and "and")
@@ -96,7 +99,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
const int outputTypeFlags =
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
- | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
+ | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
+ | (isExactMatchWithIntentionalOmission ?
+ Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
// Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index e4de1f4cc..56c53c1c2 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -20,11 +20,11 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
namespace latinime {
// TODO: Support n-gram.
-// TODO: Support beginning of sentence.
// This class does not take ownership of any code point buffers.
class PrevWordsInfo {
public:
@@ -52,8 +52,7 @@ class PrevWordsInfo {
void getPrevWordsTerminalPtNodePos(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
- int *const outPrevWordsTerminalPtNodePos,
- const bool tryLowerCaseSearch) const {
+ int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
@@ -63,17 +62,11 @@ class PrevWordsInfo {
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
- int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
- mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
- // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
- // dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) {
- // If no bigrams for this exact word, search again in lower case.
- pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
- mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
- }
- return BinaryDictionaryBigramsIterator(
- dictStructurePolicy->getBigramsStructurePolicy(), pos);
+ const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch(
+ dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
+ mIsBeginningOfSentence[0]);
+ return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
+ bigramListPos);
}
// n is 1-indexed.
@@ -99,11 +92,21 @@ class PrevWordsInfo {
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
- if (!dictStructurePolicy || !wordCodePoints) {
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
return NOT_A_DICT_POS;
}
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
- wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
+ codePoints, codePointCount, false /* forceLowerCaseSearch */);
if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
// Return the position when when the word was found or doesn't try lower case
// search.
@@ -112,7 +115,36 @@ class PrevWordsInfo {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
- wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
+ codePoints, codePointCount, true /* forceLowerCaseSearch */);
+ }
+
+ static int getBigramListPositionForWordWithTryingLowerCaseSearch(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence) {
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
+ return NOT_A_DICT_POS;
+ }
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
+ int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+ codePointCount, false /* forceLowerCaseSearch */);
+ // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
+ // dictionary or has no bigrams
+ if (NOT_A_DICT_POS == pos) {
+ // If no bigrams for this exact word, search again in lower case.
+ pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+ codePointCount, true /* forceLowerCaseSearch */);
+ }
+ return pos;
}
static int getBigramListPositionForWord(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 479d15164..87cf0cd3b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -246,6 +246,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return &mLocale;
}
+ bool supportsBeginningOfSentence() const {
+ return mDictFormatVersion >= FormatUtils::VERSION_4;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt
index 9e29e836c..9e29e836c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
index 7ad072f09..3e8e059f2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
@@ -22,19 +22,19 @@
* suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const {
@@ -285,6 +285,6 @@ bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigra
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
index adf687bac..61623468e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
@@ -22,28 +22,28 @@
* suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H
-#define LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H
+#ifndef LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class BigramDictContent;
-} // namespace v401
+} // namespace v402
} // namespace backward
class BigramProperty;
namespace backward {
-namespace v401 {
-} // namespace v401
+namespace v402 {
+} // namespace v402
} // namespace backward
class HeaderPolicy;
namespace backward {
-namespace v401 {
+namespace v402 {
class TerminalPositionLookupTable;
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
@@ -87,7 +87,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
const HeaderPolicy *const mHeaderPolicy;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
index 1e53ff94a..e2dd93c5e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
@@ -21,20 +21,22 @@
* suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
- if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) {
- AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d",
- *bigramEntryPos, bigramListBuffer->getTailPosition());
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
ASSERT(false);
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
Ver4DictConstants::NOT_A_TERMINAL_ID);
@@ -47,8 +49,6 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int level = 0;
int count = 0;
if (mHasHistoricalInfo) {
- probability = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition(
@@ -56,7 +56,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else {
- probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
}
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
@@ -74,21 +75,13 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
- const int bigramFlags = createAndGetBigramFlags(
- mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
- bigramEntryToWrite->hasNext());
+ const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
if (mHasHistoricalInfo) {
- if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
- bigramEntryToWrite->getProbability());
- return false;
- }
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
@@ -108,6 +101,13 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
historicalInfo->getCount());
return false;
}
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ bigramEntryToWrite->getProbability());
+ return false;
+ }
}
const int targetTerminalIdToWrite =
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
@@ -219,6 +219,6 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
return true;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h
index f9c474b4a..b554e5676 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h
@@ -21,18 +21,18 @@
* suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
*/
-#ifndef LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H
-#define LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H
+#ifndef LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class BigramDictContent : public SparseTableDictContent {
public:
@@ -104,9 +104,22 @@ class BigramDictContent : public SparseTableDictContent {
private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
- int createAndGetBigramFlags(const int probability, const bool hasNext) const {
- return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
- | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
+ int createAndGetBigramFlags(const bool hasNext) const {
+ return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
+ }
+
+ int getBigramEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ }
}
bool runGCBigramList(const int bigramListPos,
@@ -116,7 +129,7 @@ class BigramDictContent : public SparseTableDictContent {
bool mHasHistoricalInfo;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H */
+#endif /* LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
index 82c4b53a8..40968b4d8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
@@ -21,16 +21,16 @@
* suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
*/
-#ifndef LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H
-#define LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H
+#ifndef LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
+#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class BigramEntry {
public:
@@ -104,7 +104,7 @@ class BigramEntry {
const HistoricalInfo mHistoricalInfo;
const int mTargetTerminalId;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H */
+#endif /* LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h
index 39e29001c..0f2f25534 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h
@@ -21,14 +21,14 @@
* suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
*/
-#ifndef LATINIME_BACKWARD_V401_DICT_CONTENT_H
-#define LATINIME_BACKWARD_V401_DICT_CONTENT_H
+#ifndef LATINIME_BACKWARD_V402_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_DICT_CONTENT_H
#include "defines.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class DictContent {
public:
@@ -41,7 +41,7 @@ class DictContent {
private:
DISALLOW_COPY_AND_ASSIGN(DictContent);
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_DICT_CONTENT_H */
+#endif /* LATINIME_BACKWARD_V402_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
index 337b97c05..c671647d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
@@ -21,16 +21,16 @@
* suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
@@ -166,6 +166,6 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit
return true;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h
index db3070994..3734797d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h
@@ -21,18 +21,18 @@
* suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
*/
-#ifndef LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H
-#define LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H
+#ifndef LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class ProbabilityEntry;
@@ -68,7 +68,7 @@ class ProbabilityDictContent : public SingleDictContent {
bool mHasHistoricalInfo;
int mSize;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H */
+#endif /* LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
index d341e7b07..8ccfa33dc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
@@ -21,16 +21,16 @@
* suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
*/
-#ifndef LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H
-#define LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H
+#ifndef LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
+#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class ProbabilityEntry {
public:
@@ -84,7 +84,7 @@ class ProbabilityEntry {
const int mProbability;
const HistoricalInfo mHistoricalInfo;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H */
+#endif /* LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp
index 3214807ad..56bc8b98d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp
@@ -21,13 +21,13 @@
* suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
@@ -194,6 +194,6 @@ int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
| (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h
index 75fd4f3b2..179cec5bb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h
@@ -21,17 +21,17 @@
* suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
*/
-#ifndef LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H
-#define LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H
+#ifndef LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class ShortcutDictContent : public SparseTableDictContent {
public:
@@ -95,7 +95,7 @@ class ShortcutDictContent : public SparseTableDictContent {
int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H */
+#endif /* LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
index a519cd835..6433650b0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
@@ -21,19 +21,19 @@
* suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
*/
-#ifndef LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H
-#define LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H
+#ifndef LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class SingleDictContent : public DictContent {
public:
@@ -80,7 +80,7 @@ class SingleDictContent : public DictContent {
BufferWithExtendableBuffer mExpandableContentBuffer;
const bool mIsValid;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H */
+#endif /* LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp
index 638132c3d..7c9b4967a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp
@@ -21,11 +21,11 @@
* suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
bool SparseTableDictContent::flush(const char *const dictPath,
const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
@@ -45,6 +45,6 @@ bool SparseTableDictContent::flush(const char *const dictPath,
return true;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
index b95de2eda..c7233edd3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
@@ -21,12 +21,12 @@
* suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
*/
-#ifndef LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H
-#define LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H
+#ifndef LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@@ -34,7 +34,7 @@
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
// TODO: Support multiple contents.
class SparseTableDictContent : public DictContent {
@@ -116,7 +116,7 @@ class SparseTableDictContent : public DictContent {
SparseTable mAddressLookupTable;
const bool mIsValid;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H */
+#endif /* LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp
index ab8a3ae43..a9f841779 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp
@@ -21,14 +21,14 @@
* suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
@@ -106,6 +106,6 @@ bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminal
return true;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h
index dbf0e6088..eadfe0faa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h
@@ -21,18 +21,18 @@
* suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
*/
-#ifndef LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
-#define LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
+#ifndef LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
#include <unordered_map>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class TerminalPositionLookupTable : public SingleDictContent {
public:
@@ -67,7 +67,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
int mSize;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif // LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
+#endif // LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h
index 6a4e83c0d..941fda748 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h
@@ -22,18 +22,18 @@
* suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
-#define LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
+#ifndef LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
@@ -112,7 +112,7 @@ class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
ShortcutDictContent *const mShortcutDictContent;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif // LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
+#endif // LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
index 55ead01a9..93f192976 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
@@ -21,7 +21,7 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
#include <cerrno>
#include <cstring>
@@ -33,7 +33,7 @@
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
@@ -65,6 +65,7 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
return false;
}
}
+ umask(S_IWGRP | S_IWOTH);
if (mkdir(tmpDirPath, S_IRWXU) == -1) {
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
return false;
@@ -150,6 +151,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h
index 716ed931b..e775be52e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h
@@ -21,24 +21,24 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H
-#define LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H
+#ifndef LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H
+#define LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H
#include <memory>
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4DictBuffers {
public:
@@ -146,7 +146,7 @@ class Ver4DictBuffers {
ShortcutDictContent mShortcutDictContent;
const int mIsUpdatable;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp
index 793b44ed4..81d85f495 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp
@@ -21,11 +21,11 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
// These values MUST match the definitions in FormatSpec.java.
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
@@ -76,6 +76,6 @@ const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h
index 17afeb156..88ebd6a75 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h
@@ -21,14 +21,14 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H
-#define LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H
+#ifndef LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H
+#define LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H
#include "defines.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
// TODO: Create PtConstants under the pt_common and move some constant values there.
// Note that there are corresponding definitions in FormatSpec.java.
@@ -78,7 +78,7 @@ class Ver4DictConstants {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
index 80b51b292..82399f190 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
@@ -21,19 +21,19 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int ptNodePos, const int siblingNodePos) const {
@@ -104,6 +104,6 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
}
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h
index 0531b0a29..4032a67fa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h
@@ -21,8 +21,8 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H
-#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
@@ -30,18 +30,18 @@
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
-} // namespace v401
+} // namespace v402
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
-namespace v401 {
-} // namespace v401
+namespace v402 {
+} // namespace v402
} // namespace backward
class HeaderPolicy;
namespace backward {
-namespace v401 {
+namespace v402 {
class ProbabilityDictContent;
/*
@@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos) const;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 8de6bacfc..4220a9561 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -21,24 +21,24 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
@@ -424,6 +424,6 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index 7f1851d63..08226ea26 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -21,29 +21,29 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H
-#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
-} // namespace v401
+} // namespace v402
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
-namespace v401 {
-} // namespace v401
+namespace v402 {
+} // namespace v402
} // namespace backward
class HeaderPolicy;
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4BigramListPolicy;
class Ver4DictBuffers;
class Ver4PatriciaTrieNodeReader;
@@ -139,7 +139,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 97e1120a3..e571d8986 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -22,7 +22,7 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
#include <vector>
@@ -33,13 +33,13 @@
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/session/prev_words_info.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
// BinaryDictionaryDecayingTests.
@@ -70,13 +70,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// valid terminal DicNode.
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ if (ptNodeParams.representsNonWordInfo()) {
+ // Skip PtNodes that represent non-word information.
+ continue;
+ }
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
- readingHelper.readNextSiblingNode(ptNodeParams);
}
if (readingHelper.isError()) {
mIsCorrupted = true;
@@ -122,9 +126,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
} else if (bigramProbability == NOT_A_PROBABILITY) {
return ProbabilityUtils::backoff(unigramProbability);
} else {
- // bigramProbability is a bigram probability delta.
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
- bigramProbability);
+ return bigramProbability;
}
}
}
@@ -189,9 +191,19 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
- if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
unigramProperty, &addedNewUnigram)) {
- if (addedNewUnigram) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
mUnigramCount++;
}
if (unigramProperty->getShortcuts().size() > 0) {
@@ -221,8 +233,6 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) {
- const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1);
- const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1);
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
@@ -232,15 +242,20 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
mDictBuffer->getTailPosition());
return false;
}
- if (length0 > MAX_WORD_LENGTH
- || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
- AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
- "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size());
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
return false;
}
- const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
+ if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert the ngram to the dictionary. "
+ "length: %d", bigramProperty->getTargetCodePoints()->size());
+ return false;
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalPtNodePositionOfWord(
@@ -250,7 +265,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) {
+ if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
+ &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -261,11 +277,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const int *const word1, const int length1) {
- const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1);
- const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1);
+ const int *const word, const int length) {
if (!mBuffers->isUpdatable()) {
- AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
return false;
}
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
@@ -273,22 +287,26 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
mDictBuffer->getTailPosition());
return false;
}
- if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
- AKLOGE("Either src word or target word is too long to remove the bigram to from the "
- "dictionary. length0: %d, length1: %d", length0, length1);
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
return false;
}
- const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSerch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
+ if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
mBigramCount--;
return true;
} else {
@@ -296,26 +314,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
-void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
- return;
+ return false;
}
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
AKLOGE("Cannot flush the dictionary to file.");
mIsCorrupted = true;
+ return false;
}
+ return true;
}
-void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
- return;
+ return false;
}
if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
AKLOGE("Cannot flush the dictionary to file with GC.");
mIsCorrupted = true;
+ return false;
}
+ return true;
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -409,7 +431,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
- getProbability(word1Probability, bigramEntry.getProbability());
+ bigramEntry.getProbability();
bigrams.emplace_back(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount());
@@ -432,8 +454,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
@@ -475,6 +497,6 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
return nextToken;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 95813881d..e323652d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -22,8 +22,8 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
-#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
#include <vector>
@@ -31,29 +31,29 @@
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
-} // namespace v401
+} // namespace v402
} // namespace backward
class DicNode;
namespace backward {
-namespace v401 {
-} // namespace v401
+namespace v402 {
+} // namespace v402
} // namespace backward
class DicNodeVector;
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
@@ -114,12 +114,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
- const int length);
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
+ const int length1);
- void flush(const char *const filePath);
+ bool flush(const char *const filePath);
- void flushWithGC(const char *const filePath);
+ bool flushWithGC(const char *const filePath);
bool needsToRunGC(const bool mindsBlockByGC) const;
@@ -162,7 +162,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif // LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
+#endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp
index 6cc36fbef..80d531198 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp
@@ -21,19 +21,19 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
const uint8_t *const buffer, int *pos) {
return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h
index 7417c261e..3579c26d6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h
@@ -21,8 +21,8 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H
-#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H
#include <cstdint>
@@ -30,13 +30,13 @@
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
-} // namespace v401
+} // namespace v402
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4PatriciaTrieReadingUtils {
public:
@@ -46,7 +46,7 @@ class Ver4PatriciaTrieReadingUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
index 10f27beb7..99eed0f67 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
@@ -21,26 +21,26 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
#include <cstring>
#include <queue>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
const int unigramCount, const int bigramCount) const {
@@ -222,13 +222,16 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
// Delete unigrams.
while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
const PtNodeParams ptNodeParams =
ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
return false;
}
- priorityQueue.pop();
}
return true;
}
@@ -296,6 +299,6 @@ bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTermi
return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
index be44aaa33..9034ee656 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
@@ -21,22 +21,22 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H
-#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
-} // namespace v401
+} // namespace v402
} // namespace backward
class HeaderPolicy;
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4DictBuffers;
class Ver4PatriciaTrieNodeReader;
class Ver4PatriciaTrieNodeWriter;
@@ -133,8 +133,8 @@ class Ver4PatriciaTrieWritingHelper {
Ver4DictBuffers *const mBuffers;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp
index 33e4e55e2..537a6d420 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp
@@ -21,7 +21,7 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
*/
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
@@ -29,7 +29,7 @@
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
int *const outPtNodeCount, int *const outFirstPtNodePos) const {
@@ -85,6 +85,6 @@ bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLin
return true;
}
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h
index 3a7eefa44..4f8056801 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h
@@ -21,21 +21,21 @@
* suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
*/
-#ifndef LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H
-#define LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H
+#ifndef LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H
+#define LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
namespace latinime {
namespace backward {
-namespace v401 {
+namespace v402 {
-} // namespace v401
+} // namespace v402
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
-namespace v401 {
+namespace v402 {
class Ver4PtNodeArrayReader : public PtNodeArrayReader {
public:
@@ -51,7 +51,7 @@ class Ver4PtNodeArrayReader : public PtNodeArrayReader {
const BufferWithExtendableBuffer *const mBuffer;
};
-} // namespace v401
+} // namespace v402
} // namespace backward
} // namespace latinime
-#endif /* LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H */
+#endif /* LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index f93d2894c..e4b5fa267 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -19,9 +19,9 @@
#include <climits>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
@@ -58,10 +58,10 @@ namespace latinime {
FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
switch (dictFormatVersion) {
case FormatUtils::VERSION_4: {
- return newPolicyForOnMemoryV4Dict<backward::v401::Ver4DictConstants,
- backward::v401::Ver4DictBuffers,
- backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
- backward::v401::Ver4PatriciaTriePolicy>(
+ return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v402::Ver4PatriciaTriePolicy>(
dictFormatVersion, locale, attributeMap);
}
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
@@ -116,10 +116,10 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
break;
case FormatUtils::VERSION_4: {
- return newPolicyForV4Dict<backward::v401::Ver4DictConstants,
- backward::v401::Ver4DictBuffers,
- backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
- backward::v401::Ver4PatriciaTriePolicy>(
+ return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v402::Ver4PatriciaTriePolicy>(
headerFilePath, formatVersion, std::move(mmappedBuffer));
}
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
index 028e9ecbf..1f00fc6ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners
}
} else {
mValueStack.back() += 1;
- if (ptNodeParams->isTerminal()) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
mValidUnigramCount += 1;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 5704c2e90..b2e60a837 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,12 @@ class PtNodeParams {
}
AK_FORCE_INLINE bool representsNonWordInfo() const {
- return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ && isNotAWord();
+ }
+
+ AK_FORCE_INLINE int representsBeginningOfSentence() const {
+ return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
&& isNotAWord();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 30dcfba37..a6a470c4e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 6240d46aa..88bbfd966 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -102,14 +102,16 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- void flush(const char *const filePath) {
+ bool flush(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return false;
}
- void flushWithGC(const char *const filePath) {
+ bool flushWithGC(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
}
bool needsToRunGC(const bool mindsBlockByGC) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index d53922763..e1ceaee49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -23,9 +23,11 @@ namespace latinime {
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
- if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) {
- AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d",
- *bigramEntryPos, bigramListBuffer->getTailPosition());
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
ASSERT(false);
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
Ver4DictConstants::NOT_A_TERMINAL_ID);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index b8bdb63a8..52447a336 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -99,6 +99,20 @@ class BigramDictContent : public SparseTableDictContent {
return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
}
+ int getBigramEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ }
+ }
+
bool runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 439e90e44..09c7b7d85 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
readingHelper.readNextSiblingNode(ptNodeParams);
- if (!ptNodeParams.representsNonWordInfo()) {
+ if (ptNodeParams.representsNonWordInfo()) {
// Skip PtNodes that represent non-word information.
continue;
}
@@ -181,9 +181,19 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
- if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
unigramProperty, &addedNewUnigram)) {
- if (addedNewUnigram) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
mUnigramCount++;
}
if (unigramProperty->getShortcuts().size() > 0) {
@@ -294,26 +304,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
-void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
- return;
+ return false;
}
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
AKLOGE("Cannot flush the dictionary to file.");
mIsCorrupted = true;
+ return false;
}
+ return true;
}
-void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
- return;
+ return false;
}
if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
AKLOGE("Cannot flush the dictionary to file with GC.");
mIsCorrupted = true;
+ return false;
}
+ return true;
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -430,8 +444,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 008f2e423..d198c97fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -99,9 +99,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
const int length1);
- void flush(const char *const filePath);
+ bool flush(const char *const filePath);
- void flushWithGC(const char *const filePath);
+ bool flushWithGC(const char *const filePath);
bool needsToRunGC(const bool mindsBlockByGC) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 105363db5..4da339b0a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -23,7 +23,7 @@
#include <sys/types.h>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -42,9 +42,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
switch (formatVersion) {
case FormatUtils::VERSION_4:
- return createEmptyV4DictFile<backward::v401::Ver4DictConstants,
- backward::v401::Ver4DictBuffers,
- backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr>(
+ return createEmptyV4DictFile<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>(
filePath, localeAsCodePointVector, attributeMap, formatVersion);
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index ba405b07e..1916ea560 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -50,7 +50,7 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
case MAGIC_NUMBER:
- // Version 2 header is as follows:
+ // The layout of the header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
// Dictionary format version number (2 bytes)
// Options (2 bytes)
@@ -58,17 +58,7 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// Conceptually this converts the hardcoded value of the bytes in the file into
// the symbolic value we use in the code. But we want the constants to be the
// same so we use them for both here.
- if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
- return VERSION_2;
- } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
- return VERSION_4_ONLY_FOR_TESTING;
- } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
- return VERSION_4;
- } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_DEV) {
- return VERSION_4_DEV;
- } else {
- return UNKNOWN_VERSION;
- }
+ return getFormatVersion(ByteArrayUtils::readUint16(dict, 4));
default:
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index c47f30ca4..55ad5799f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -32,8 +32,8 @@ class FormatUtils {
// These MUST have the same values as the relevant constants in FormatSpec.java.
VERSION_2 = 2,
VERSION_4_ONLY_FOR_TESTING = 399,
- VERSION_4 = 401,
- VERSION_4_DEV = 402,
+ VERSION_4 = 402,
+ VERSION_4_DEV = 403,
UNKNOWN_VERSION = -1
};
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 634c45b04..f28ed5682 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -18,6 +18,7 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
+#include <cstring>
#include <vector>
#include "defines.h"
@@ -93,6 +94,19 @@ class CharUtils {
static unsigned short latin_tolower(const unsigned short c);
static const std::vector<int> EMPTY_STRING;
+ // Returns updated code point count. Returns 0 when the code points cannot be marked as a
+ // Beginning-of-Sentence.
+ static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
+ const int codePointCount, const int maxCodePoint) {
+ if (codePointCount >= maxCodePoint) {
+ // the code points cannot be marked as a Beginning-of-Sentence.
+ return 0;
+ }
+ memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
+ codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
+ return codePointCount + 1;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);