aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk18
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp67
-rw-r--r--native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.cpp47
-rw-r--r--native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.h25
-rw-r--r--native/jni/jni_common.cpp9
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h58
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp52
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h26
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h4
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h7
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp41
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h28
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp49
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp123
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h105
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h124
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h55
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp66
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h123
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp72
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h20
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.cpp13
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.h9
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h34
-rw-r--r--native/jni/src/suggest/core/dictionary/probability_utils.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h9
-rw-r--r--native/jni/src/suggest/core/dictionary/terminal_attributes.h93
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h42
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h50
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h46
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_policy.h79
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h81
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp2
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp23
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h5
-rw-r--r--native/jni/src/suggest/core/suggest.cpp56
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h54
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp66
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h130
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp153
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h87
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/binary_format.h470
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h48
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp38
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp273
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h68
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp83
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h199
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h56
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_header.h)41
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp108
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h76
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp353
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h83
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h114
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h140
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/byte_array_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h (renamed from native/jni/src/suggest/core/dictionary/byte_array_utils.h)55
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp)21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h)15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h102
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
-rw-r--r--native/jni/src/utils/autocorrection_threshold_utils.cpp7
77 files changed, 3093 insertions, 1935 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index e14cf5a71..0e36f3df9 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -43,6 +43,7 @@ LATIN_IME_JNI_SRC_FILES := \
com_android_inputmethod_keyboard_ProximityInfo.cpp \
com_android_inputmethod_latin_BinaryDictionary.cpp \
com_android_inputmethod_latin_DicTraverseSession.cpp \
+ com_android_inputmethod_latin_makedict_Ver3DictDecoder.cpp \
jni_common.cpp
LATIN_IME_CORE_SRC_FILES := \
@@ -53,12 +54,7 @@ LATIN_IME_CORE_SRC_FILES := \
dic_nodes_cache.cpp) \
$(addprefix suggest/core/dictionary/, \
bigram_dictionary.cpp \
- binary_dictionary_format_utils.cpp \
- binary_dictionary_header.cpp \
- binary_dictionary_header_reading_utils.cpp \
- binary_dictionary_terminal_attributes_reading_utils.cpp \
bloom_filter.cpp \
- byte_array_utils.cpp \
dictionary.cpp \
digraph_utils.cpp \
multi_bigram_map.cpp) \
@@ -71,11 +67,23 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
+ bigram/bigram_list_read_write_utils.cpp \
+ bigram/dynamic_bigram_list_policy.cpp \
+ header/header_policy.cpp \
+ header/header_reading_utils.cpp \
+ shortcut/shortcut_list_reading_utils.cpp \
+ dictionary_structure_with_buffer_policy_factory.cpp \
dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \
+ dynamic_patricia_trie_reading_helper.cpp \
dynamic_patricia_trie_reading_utils.cpp \
+ dynamic_patricia_trie_writing_helper.cpp \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/utils/, \
+ buffer_with_extendable_buffer.cpp \
+ byte_array_utils.cpp \
+ format_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 8b46c2644..86c2394d1 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -18,37 +18,20 @@
#include "com_android_inputmethod_latin_BinaryDictionary.h"
-#include <cerrno>
#include <cstring> // for memset()
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h"
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/autocorrection_threshold_utils.h"
namespace latinime {
class ProximityInfo;
-// Helper method
-static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) {
- int ret = munmap(const_cast<void *>(dictBuf), length);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
- }
- ret = close(fd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
- }
-}
-
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
@@ -61,41 +44,16 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
- int fd = 0;
- void *dictBuf = 0;
- int offset = 0;
- const bool updatableMmap = (isUpdatable == JNI_TRUE);
- const int openMode = updatableMmap ? O_RDWR : O_RDONLY;
- fd = open(sourceDirChars, openMode);
- if (fd < 0) {
- AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
- return 0;
- }
- int pagesize = getpagesize();
- offset = static_cast<int>(dictOffset) % pagesize;
- int adjDictOffset = static_cast<int>(dictOffset) - offset;
- int adjDictSize = static_cast<int>(dictSize) + offset;
- const int protMode = updatableMmap ? PROT_READ | PROT_WRITE : PROT_READ;
- dictBuf = mmap(0, adjDictSize, protMode, MAP_PRIVATE, fd, adjDictOffset);
- if (dictBuf == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy =
+ DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
+ sourceDirChars, static_cast<int>(sourceDirUtf8Length),
+ static_cast<int>(dictOffset), static_cast<int>(dictSize),
+ isUpdatable == JNI_TRUE);
+ if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
- dictBuf = static_cast<char *>(dictBuf) + offset;
- if (!dictBuf) {
- AKLOGE("DICT: dictBuf is null");
- return 0;
- }
- Dictionary *dictionary = 0;
- if (BinaryDictionaryFormatUtils::UNKNOWN_VERSION
- == BinaryDictionaryFormatUtils::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
- static_cast<int>(dictSize))) {
- AKLOGE("DICT: dictionary format is unknown, bad magic number");
- releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
- } else {
- dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
- updatableMmap);
- }
+
+ Dictionary *const dictionary = new Dictionary(env, dictionaryStructureWithBufferPolicy);
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
@@ -104,13 +62,6 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
- const BinaryDictionaryInfo *const binaryDictionaryInfo = dictionary->getBinaryDictionaryInfo();
- const int dictBufOffset = binaryDictionaryInfo->getDictBufOffset();
- const void *dictBuf = binaryDictionaryInfo->getDictBuf();
- if (!dictBuf) return;
- releaseDictBuf(static_cast<const char *>(dictBuf) - dictBufOffset,
- binaryDictionaryInfo->getDictSize() + dictBufOffset,
- binaryDictionaryInfo->getMmapFd());
delete dictionary;
}
diff --git a/native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.cpp b/native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.cpp
new file mode 100644
index 000000000..15088b65a
--- /dev/null
+++ b/native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "LatinIME: jni: Ver3DictDecoder"
+
+#include "com_android_inputmethod_latin_makedict_Ver3DictDecoder.h"
+
+#include "defines.h"
+#include "jni.h"
+#include "jni_common.h"
+
+namespace latinime {
+static int latinime_Ver3DictDecoder_doNothing(JNIEnv *env, jclass clazz) {
+ // This is a phony method for test - it does nothing. It just returns some value
+ // unlikely to be in memory by chance for testing purposes.
+ // TODO: remove this method.
+ return 2097;
+}
+
+static const JNINativeMethod sMethods[] = {
+ {
+ // TODO: remove this entry when we have one useful method in here
+ const_cast<char *>("doNothing"),
+ const_cast<char *>("()I"),
+ reinterpret_cast<void *>(latinime_Ver3DictDecoder_doNothing)
+ },
+};
+
+int register_Ver3DictDecoder(JNIEnv *env) {
+ const char *const kClassPathName =
+ "com/android/inputmethod/latin/makedict/Ver3DictDecoder";
+ return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
+}
+} // namespace latinime
diff --git a/native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.h b/native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.h
new file mode 100644
index 000000000..07e80f1d8
--- /dev/null
+++ b/native/jni/com_android_inputmethod_latin_makedict_Ver3DictDecoder.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _COM_ANDROID_INPUTMETHOD_LATIN_MAKEDICT_VER3DICTDECODER_H
+#define _COM_ANDROID_INPUTMETHOD_LATIN_MAKEDICT_VER3DICTDECODER_H
+
+#include "jni.h"
+
+namespace latinime {
+int register_Ver3DictDecoder(JNIEnv *env);
+} // namespace latinime
+#endif // _COM_ANDROID_INPUTMETHOD_LATIN_MAKEDICT_VER3DICTDECODER_H
diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp
index f2867d7c3..3a8f4362d 100644
--- a/native/jni/jni_common.cpp
+++ b/native/jni/jni_common.cpp
@@ -18,9 +18,12 @@
#include "jni_common.h"
+#ifndef HOST_TOOL
#include "com_android_inputmethod_keyboard_ProximityInfo.h"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include "com_android_inputmethod_latin_DicTraverseSession.h"
+#endif
+#include "com_android_inputmethod_latin_makedict_Ver3DictDecoder.h"
#include "defines.h"
/*
@@ -38,6 +41,7 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
AKLOGE("ERROR: JNIEnv is invalid");
return -1;
}
+#ifndef HOST_TOOL
if (!latinime::register_BinaryDictionary(env)) {
AKLOGE("ERROR: BinaryDictionary native registration failed");
return -1;
@@ -50,6 +54,11 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
AKLOGE("ERROR: ProximityInfo native registration failed");
return -1;
}
+#endif
+ if (!latinime::register_Ver3DictDecoder(env)) {
+ AKLOGE("ERROR: Ver3DictDecoder native registration failed");
+ return -1;
+ }
/* success -- return valid version number */
return JNI_VERSION_1_6;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h
deleted file mode 100644
index 1a39f2ef3..000000000
--- a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DIC_NODE_PROXIMITY_FILTER_H
-#define LATINIME_DIC_NODE_PROXIMITY_FILTER_H
-
-#include "defines.h"
-#include "suggest/core/layout/proximity_info_state.h"
-#include "suggest/core/layout/proximity_info_utils.h"
-#include "suggest/core/policy/dictionary_structure_policy.h"
-
-namespace latinime {
-
-class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter {
- public:
- DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
- const int pointIndex, const bool exactOnly)
- : mProximityInfoState(pInfoState), mPointIndex(pointIndex), mExactOnly(exactOnly) {}
-
- bool isFilteredOut(const int codePoint) const {
- return !isProximityCodePoint(codePoint);
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeProximityFilter);
-
- const ProximityInfoState *const mProximityInfoState;
- const int mPointIndex;
- const bool mExactOnly;
-
- // TODO: Move to proximity info state
- bool isProximityCodePoint(const int codePoint) const {
- if (!mProximityInfoState) {
- return true;
- }
- if (mExactOnly) {
- return mProximityInfoState->getPrimaryCodePointAt(mPointIndex) == codePoint;
- }
- const ProximityType matchedId = mProximityInfoState->getProximityType(
- mPointIndex, codePoint, true /* checkProximityChars */);
- return ProximityInfoUtils::isMatchOrProximityChar(matchedId);
- }
-};
-} // namespace latinime
-#endif // LATINIME_DIC_NODE_PROXIMITY_FILTER_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 6b4ef2fea..bb54e608e 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -19,12 +19,10 @@
#include <cstring>
#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_proximity_filter.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h"
-#include "suggest/core/policy/dictionary_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -33,17 +31,17 @@ namespace latinime {
// Node initialization utils //
///////////////////////////////
-/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+/* static */ void DicNodeUtils::initAsRoot(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(),
- prevWordNodePos);
+ newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition());
+ prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
}
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
@@ -53,37 +51,16 @@ namespace latinime {
///////////////////////////////////
// Traverse node expansion utils //
///////////////////////////////////
-
-/* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode,
- const DicNodeProximityFilter *const childrenFilter,
- DicNodeVector *childDicNodes) {
- // Passing multiple chars node. No need to traverse child
- const int codePoint = dicNode->getNodeTypedCodePoint();
- const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint);
- if (!childrenFilter->isFilteredOut(codePoint)
- || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
- childDicNodes->pushPassingChild(dicNode);
- }
-}
-
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) {
- getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes);
-}
-
-/* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
- const DicNodeProximityFilter childrenFilter(pInfoState, pointIndex, exactOnly);
if (!dicNode->isLeavingNode()) {
- DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
+ childDicNodes->pushPassingChild(dicNode);
} else {
- binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
- binaryDictionaryInfo, &childrenFilter, childDicNodes);
+ dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
}
}
@@ -94,12 +71,13 @@ namespace latinime {
* Computes the combined bigram / unigram cost for the given dicNode.
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) {
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap);
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
+ multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
/ static_cast<float>(MAX_PROBABILITY);
@@ -107,7 +85,7 @@ namespace latinime {
}
/* static */ int DicNodeUtils::getBigramNodeProbability(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) {
const int unigramProbability = node->getProbability();
const int wordPos = node->getPos();
@@ -118,8 +96,8 @@ namespace latinime {
return ProbabilityUtils::backoff(unigramProbability);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(
- binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
+ wordPos, unigramProbability);
}
return ProbabilityUtils::backoff(unigramProbability);
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 4f12b29f4..3fb351a61 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -23,41 +23,37 @@
namespace latinime {
-class BinaryDictionaryInfo;
class DicNode;
-class DicNodeProximityFilter;
class DicNodeVector;
-class ProximityInfoState;
+class DictionaryStructureWithBufferPolicy;
class MultiBigramMap;
class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
const int16_t length1, int *dest);
- static void initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ static void initAsRoot(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *newRootNode);
- static void initAsRootWithPreviousWord(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ static void initAsRootWithPreviousWord(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *prevWordLastNode, DicNode *newRootNode);
static void initByCopy(DicNode *srcNode, DicNode *destNode);
static void getAllChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes);
- static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const DicNode *const node, MultiBigramMap *const multiBigramMap);
- // TODO: Move to private
- static void getProximityChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
+ static float getBigramNodeImprobability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const node, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
// Max number of bigrams to look up
static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
- static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ static int getBigramNodeProbability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap);
- static void createAndGetPassingChildNode(DicNode *dicNode,
- const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h
index 45c7f5cf9..74eb5dfe7 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h
@@ -49,8 +49,10 @@ class DicNodeStateOutput {
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
if (mergedNodeCodePoints) {
+ const int additionalCodePointCount = min(static_cast<int>(mergedNodeCodePointCount),
+ MAX_WORD_LENGTH - mOutputtedCodePointCount);
memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
- mergedNodeCodePointCount * sizeof(mCodePointsBuf[0]));
+ additionalCodePointCount * sizeof(mCodePointsBuf[0]));
mOutputtedCodePointCount = static_cast<uint16_t>(
mOutputtedCodePointCount + mergedNodeCodePointCount);
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index 5854f4f6e..f437c95f6 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -69,11 +69,14 @@ class DicNodeStatePrevWord {
const int prevWordNodePos, const int *const src0, const int16_t length0,
const int *const src1, const int16_t length1, const int *const prevSpacePositions,
const int lastInputIndex) {
- mPrevWordCount = prevWordCount;
+ mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
mPrevWordNodePos = prevWordNodePos;
- const int twoWordsLen =
+ int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
+ if (twoWordsLen >= MAX_WORD_LENGTH) {
+ twoWordsLen = MAX_WORD_LENGTH - 1;
+ }
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
mPrevWordStart = length0;
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 3751ae500..e74a1dbc8 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -22,15 +22,16 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/probability_utils.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime {
-BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mBinaryDictionaryInfo(binaryDictionaryInfo) {
+BigramDictionary::BigramDictionary(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
+ : mDictionaryStructurePolicy(dictionaryStructurePolicy) {
if (DEBUG_DICT) {
AKLOGI("BigramDictionary - constructor");
}
@@ -112,13 +113,19 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int bigramCount = 0;
int unigramProbability = 0;
int bigramBuffer[MAX_WORD_LENGTH];
- BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
+ BinaryDictionaryBigramsIterator bigramsIt(
+ mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- const int length = mBinaryDictionaryInfo->getStructurePolicy()->
- getCodePointsAndProbabilityAndReturnCodePointCount(
- mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
- bigramBuffer, &unigramProbability);
+ if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
+ continue;
+ }
+ const int codePointCount = mDictionaryStructurePolicy->
+ getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
+ MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
+ if (codePointCount <= 0) {
+ continue;
+ }
// Due to space constraints, the probability for bigrams is approximate - the lower the
// unigram probability, the worse the precision. The theoritical maximum error in
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
@@ -126,8 +133,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
// here, but it can't get too bad.
const int probability = ProbabilityUtils::computeProbabilityForBigram(
unigramProbability, bigramsIt.getProbability());
- addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
- outputTypes);
+ addWordBigram(bigramBuffer, codePointCount, probability, outBigramProbability,
+ outBigramCodePoints, outputTypes);
++bigramCount;
}
return min(bigramCount, MAX_RESULTS);
@@ -138,11 +145,10 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
- mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
+ int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
+ forceLowerCaseSearch);
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
- return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(
- mBinaryDictionaryInfo, pos);
+ return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
}
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
@@ -150,11 +156,12 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return false;
- int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
- mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
+ int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
- BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
+ BinaryDictionaryBigramsIterator bigramsIt(
+ mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) {
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 438c34cac..99b964c49 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -21,11 +21,11 @@
namespace latinime {
-class BinaryDictionaryInfo;
+class DictionaryStructureWithBufferPolicy;
class BigramDictionary {
public:
- BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+ BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
int getPredictions(const int *word, int length, int *outBigramCodePoints,
int *outBigramProbability, int *outputTypes) const;
@@ -40,7 +40,7 @@ class BigramDictionary {
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
index 8cbb12998..d16ac47fe 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -18,51 +18,41 @@
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
namespace latinime {
class BinaryDictionaryBigramsIterator {
public:
BinaryDictionaryBigramsIterator(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
- mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {}
+ const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
+ : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),
+ mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
+ mHasNext(pos != NOT_A_DICT_POS) {}
AK_FORCE_INLINE bool hasNext() const {
return mHasNext;
}
AK_FORCE_INLINE void next() {
- mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
- mBinaryDictionaryInfo, &mPos);
- mBigramPos =
- BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
- mBinaryDictionaryInfo, mBigramFlags, &mPos);
- mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
+ mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos);
}
AK_FORCE_INLINE int getProbability() const {
- return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(
- mBigramFlags);
+ return mProbability;
}
AK_FORCE_INLINE int getBigramPos() const {
return mBigramPos;
}
- AK_FORCE_INLINE int getFlags() const {
- return mBigramFlags;
- }
-
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy;
int mPos;
- BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
int mBigramPos;
+ int mProbability;
bool mHasNext;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
deleted file mode 100644
index 91c643a5f..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_header.h"
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-
-namespace latinime {
-
-const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
- "MULTIPLE_WORDS_DEMOTION_RATE";
-const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
-const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
-
-BinaryDictionaryHeader::BinaryDictionaryHeader(
- const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mBinaryDictionaryInfo(binaryDictionaryInfo),
- mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(binaryDictionaryInfo)),
- mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(binaryDictionaryInfo)),
- mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
-
-float BinaryDictionaryHeader::readMultiWordCostMultiplier() const {
- const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
- mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
- if (headerValue == S_INT_MIN) {
- // not found
- return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
- }
- if (headerValue <= 0) {
- return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
- }
- return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
deleted file mode 100644
index a57b0f859..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
-
-#include <cctype>
-#include <cstdlib>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-
-namespace latinime {
-
-const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
-
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
-
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
-// Flags for special processing
-// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
-// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-
-/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
- const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
- case HEADER_VERSION_2:
- // See the format of the header in the comment in
- // BinaryDictionaryFormatUtils::detectFormatVersion()
- return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
- + VERSION_2_HEADER_FLAG_SIZE);
- default:
- return S_INT_MAX;
- }
-}
-
-/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::getFlags(
- const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
- case HEADER_VERSION_2:
- return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
- default:
- return NO_FLAGS;
- }
-}
-
-// Returns if the key is found or not and reads the found value into outValue.
-/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const char *const key, int *outValue, const int outValueSize) {
- if (outValueSize <= 0) {
- return false;
- }
- const int headerSize = getHeaderSize(binaryDictionaryInfo);
- int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
- if (pos == NOT_A_DICT_POS) {
- // The header doesn't have header options.
- return false;
- }
- while (pos < headerSize) {
- if(ByteArrayUtils::compareStringInBufferWithCharArray(
- binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
- // The key was found.
- const int length = ByteArrayUtils::readStringAndAdvancePosition(
- binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
- // Add a 0 terminator to the string.
- outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
- return true;
- }
- ByteArrayUtils::advancePositionToBehindString(
- binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos);
- }
- // The key was not found.
- return false;
-}
-
-/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) {
- const int bufferSize = LARGEST_INT_DIGIT_COUNT;
- int intBuffer[bufferSize];
- char charBuffer[bufferSize];
- if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) {
- return S_INT_MIN;
- }
- for (int i = 0; i < bufferSize; ++i) {
- charBuffer[i] = intBuffer[i];
- if (charBuffer[i] == '0') {
- break;
- }
- if (!isdigit(charBuffer[i])) {
- // If not a number, return S_INT_MIN
- return S_INT_MIN;
- }
- }
- return atoi(charBuffer);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
deleted file mode 100644
index 61748227e..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICTIONARY_HEADER_READING_UTILS_H
-#define LATINIME_DICTIONARY_HEADER_READING_UTILS_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
-
-namespace latinime {
-
-class BinaryDictionaryInfo;
-
-class BinaryDictionaryHeaderReadingUtils {
- public:
- typedef uint16_t DictionaryFlags;
-
- static const int MAX_OPTION_KEY_LENGTH;
-
- static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo);
-
- static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo);
-
- static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
- return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
- return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
- return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE int getHeaderOptionsPosition(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
- switch (getHeaderVersion(dictionaryFormat)) {
- case HEADER_VERSION_2:
- return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
- + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
- break;
- default:
- return NOT_A_DICT_POS;
- }
- }
-
- static bool readHeaderValue(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const char *const key, int *outValue, const int outValueSize);
-
- static int readHeaderValueInt(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
-
- enum HEADER_VERSION {
- HEADER_VERSION_2,
- UNKNOWN_HEADER_VERSION
- };
-
- static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
- static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
- static const int VERSION_2_HEADER_FLAG_SIZE;
- static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
-
- static const DictionaryFlags NO_FLAGS;
- // Flags for special processing
- // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
- // something very bad (like, the apocalypse) will happen. Please update both at the same time.
- static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
- static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
- static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
- static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
-
- static HEADER_VERSION getHeaderVersion(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
- switch(formatVersion) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- // Fall through
- case BinaryDictionaryFormatUtils::VERSION_3:
- return HEADER_VERSION_2;
- default:
- return UNKNOWN_HEADER_VERSION;
- }
- }
-};
-}
-#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
deleted file mode 100644
index cbea18f90..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BINARY_DICTIONARY_INFO_H
-#define LATINIME_BINARY_DICTIONARY_INFO_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "jni.h"
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
-#include "suggest/core/dictionary/binary_dictionary_header.h"
-#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
-#include "utils/log_utils.h"
-
-namespace latinime {
-
-class BinaryDictionaryInfo {
- public:
- AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
- const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
- : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
- mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
- mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
- mDictBuf, mDictSize)),
- mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
- mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
- mDictionaryFormat)) {
- logDictionaryInfo(env);
- }
-
- AK_FORCE_INLINE const uint8_t *getDictBuf() const {
- return mDictBuf;
- }
-
- AK_FORCE_INLINE int getDictSize() const {
- return mDictSize;
- }
-
- AK_FORCE_INLINE int getMmapFd() const {
- return mMmapFd;
- }
-
- AK_FORCE_INLINE int getDictBufOffset() const {
- return mDictBufOffset;
- }
-
- AK_FORCE_INLINE const uint8_t *getDictRoot() const {
- return mDictRoot;
- }
-
- AK_FORCE_INLINE BinaryDictionaryFormatUtils::FORMAT_VERSION getFormat() const {
- return mDictionaryFormat;
- }
-
- AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
- return &mDictionaryHeader;
- }
-
- AK_FORCE_INLINE bool isDynamicallyUpdatable() const {
- // TODO: Support dynamic dictionary formats.
- const bool isUpdatableDictionaryFormat = false;
- return mIsUpdatable && isUpdatableDictionaryFormat;
- }
-
- AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
- return mStructurePolicy;
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
-
- const uint8_t *const mDictBuf;
- const int mDictSize;
- const int mMmapFd;
- const int mDictBufOffset;
- const bool mIsUpdatable;
- const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
- const BinaryDictionaryHeader mDictionaryHeader;
- const uint8_t *const mDictRoot;
- const DictionaryStructurePolicy *const mStructurePolicy;
-
- AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
- const int BUFFER_SIZE = 16;
- int dictionaryIdCodePointBuffer[BUFFER_SIZE];
- int versionStringCodePointBuffer[BUFFER_SIZE];
- int dateStringCodePointBuffer[BUFFER_SIZE];
- mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
- dictionaryIdCodePointBuffer, BUFFER_SIZE);
- mDictionaryHeader.readHeaderValueOrQuestionMark("version",
- versionStringCodePointBuffer, BUFFER_SIZE);
- mDictionaryHeader.readHeaderValueOrQuestionMark("date",
- dateStringCodePointBuffer, BUFFER_SIZE);
-
- char dictionaryIdCharBuffer[BUFFER_SIZE];
- char versionStringCharBuffer[BUFFER_SIZE];
- char dateStringCharBuffer[BUFFER_SIZE];
- intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
- dictionaryIdCharBuffer, BUFFER_SIZE);
- intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
- versionStringCharBuffer, BUFFER_SIZE);
- intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
- dateStringCharBuffer, BUFFER_SIZE);
-
- LogUtils::logToJava(env,
- "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
- dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
- }
-};
-}
-#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h
new file mode 100644
index 000000000..558e0a5c3
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
+#define LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+
+namespace latinime {
+
+class BinaryDictionaryShortcutIterator {
+ public:
+ BinaryDictionaryShortcutIterator(
+ const DictionaryShortcutsStructurePolicy *const shortcutStructurePolicy,
+ const int shortcutPos)
+ : mShortcutStructurePolicy(shortcutStructurePolicy),
+ mPos(shortcutStructurePolicy->getStartPos(shortcutPos)),
+ mHasNextShortcutTarget(shortcutPos != NOT_A_DICT_POS) {}
+
+ AK_FORCE_INLINE bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as an int string and put it to outTarget, put its length
+ // to outTargetLength, put whether it is whitelist to outIsWhitelist.
+ AK_FORCE_INLINE void nextShortcutTarget(
+ const int maxDepth, int *const outTarget, int *const outTargetLength,
+ bool *const outIsWhitelist) {
+ mShortcutStructurePolicy->getNextShortcut(maxDepth, outTarget, outTargetLength,
+ outIsWhitelist, &mHasNextShortcutTarget, &mPos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryShortcutIterator);
+
+ const DictionaryShortcutsStructurePolicy *const mShortcutStructurePolicy;
+ int mPos;
+ bool mHasNextShortcutTarget;
+};
+} // namespace latinime
+#endif // LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
deleted file mode 100644
index 20b77b3b2..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
-
-namespace latinime {
-
-typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils;
-
-const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-// Flag for presence of more attributes
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
-// Mask for attribute probability, stored on 4 bits inside the flags byte.
-const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
-// The numeric value of the shortcut probability that means 'whitelist'.
-const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
-
-/* static */ int TaUtils::getBigramAddressAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
- int *const pos) {
- int offset = 0;
- const int origin = *pos;
- switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
- offset = ByteArrayUtils::readUint8AndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
- offset = ByteArrayUtils::readUint16AndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
- offset = ByteArrayUtils::readUint24AndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- break;
- }
- if (isOffsetNegative(flags)) {
- return origin - offset;
- } else {
- return origin + offset;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
deleted file mode 100644
index 375fc7dff..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
-
-namespace latinime {
-
-class BinaryDictionaryTerminalAttributesReadingUtils {
- public:
- typedef uint8_t TerminalAttributeFlags;
- typedef TerminalAttributeFlags BigramFlags;
- typedef TerminalAttributeFlags ShortcutFlags;
-
- static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- }
-
- static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
- return flags & MASK_ATTRIBUTE_PROBABILITY;
- }
-
- static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) {
- return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
- }
-
- // Bigrams reading methods
- static AK_FORCE_INLINE void skipExistingBigrams(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
- while (hasNext(flags)) {
- *pos += attributeAddressSize(flags);
- flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
- }
- *pos += attributeAddressSize(flags);
- }
-
- static int getBigramAddressAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
- int *const pos);
-
- // Shortcuts reading methods
- // This method returns the size of the shortcut list region excluding the shortcut list size
- // field at the beginning.
- static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
- return ByteArrayUtils::readUint16AndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
- }
-
- static AK_FORCE_INLINE void skipShortcuts(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- const int shortcutListSize = getShortcutListSizeAndForwardPointer(
- binaryDictionaryInfo, pos);
- *pos += shortcutListSize;
- }
-
- static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
- return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
- }
-
- static AK_FORCE_INLINE int readShortcutTarget(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
- int *const outWord, int *const pos) {
- return ByteArrayUtils::readStringAndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
-
- static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
- static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
- static const int ATTRIBUTE_ADDRESS_SHIFT;
- static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
- static const int WHITELIST_SHORTCUT_PROBABILITY;
-
- static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
- return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
- }
-
- static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) {
- return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
- /* Note: this is a value-dependant optimization of what may probably be
- more readably written this way:
- switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
- default: return 0;
- }
- */
- }
-};
-}
-#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 4a9e38fe8..8418a608a 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -18,33 +18,35 @@
#include "suggest/core/dictionary/dictionary.h"
-#include <map> // TODO: remove
#include <stdint.h>
#include "defines.h"
-#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
+#include "utils/log_utils.h"
namespace latinime {
-Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
- int dictBufOffset, bool isUpdatable)
- : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
- dictBufOffset, isUpdatable),
- mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
+Dictionary::Dictionary(JNIEnv *env,
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
+ : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
+ mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
+ logDictionaryInfo(env);
}
Dictionary::~Dictionary() {
delete mBigramDictionary;
delete mGestureSuggest;
delete mTypingSuggest;
+ delete mDictionaryStructureWithBufferPolicy;
}
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
@@ -83,14 +85,12 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
}
int Dictionary::getProbability(const int *word, int length) const {
- const DictionaryStructurePolicy *const structurePolicy =
- mBinaryDictionaryInfo.getStructurePolicy();
- int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
+ int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == pos) {
return NOT_A_PROBABILITY;
}
- return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
+ return getDictionaryStructurePolicy()->getUnigramProbability(pos);
}
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
@@ -98,32 +98,46 @@ bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1,
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
- if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) {
- // This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: Dictionary::addUnigramWord() is called for non-updatable dictionary.");
- return;
- }
- // TODO: Support dynamic update
+ mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
- if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) {
- // This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: Dictionary::addBigramWords() is called for non-updatable dictionary.");
- return;
- }
- // TODO: Support dynamic update
+ mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
+ probability);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) {
- // This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: Dictionary::removeBigramWords() is called for non-updatable dictionary.");
- return;
- }
- // TODO: Support dynamic update
+ mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
+}
+
+void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
+ const int BUFFER_SIZE = 16;
+ int dictionaryIdCodePointBuffer[BUFFER_SIZE];
+ int versionStringCodePointBuffer[BUFFER_SIZE];
+ int dateStringCodePointBuffer[BUFFER_SIZE];
+ const DictionaryHeaderStructurePolicy *const headerPolicy =
+ getDictionaryStructurePolicy()->getHeaderStructurePolicy();
+ headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
+ BUFFER_SIZE);
+ headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
+ BUFFER_SIZE);
+ headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE);
+
+ char dictionaryIdCharBuffer[BUFFER_SIZE];
+ char versionStringCharBuffer[BUFFER_SIZE];
+ char dateStringCharBuffer[BUFFER_SIZE];
+ intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
+ dictionaryIdCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
+ versionStringCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
+ dateStringCharBuffer, BUFFER_SIZE);
+
+ LogUtils::logToJava(env,
+ "Dictionary info: dictionary = %s ; version = %s ; date = %s",
+ dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
}
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 9f1e0729d..0afe5a54b 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,11 +21,11 @@
#include "defines.h"
#include "jni.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
namespace latinime {
class BigramDictionary;
+class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
class SuggestInterface;
@@ -53,8 +53,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
- bool isUpdatable);
+ Dictionary(JNIEnv *env,
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
@@ -77,8 +77,8 @@ class Dictionary {
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
- const BinaryDictionaryInfo *getBinaryDictionaryInfo() const {
- return &mBinaryDictionaryInfo;
+ const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
+ return mDictionaryStructureWithBufferPolicy;
}
virtual ~Dictionary();
@@ -86,10 +86,12 @@ class Dictionary {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
- const BinaryDictionaryInfo mBinaryDictionaryInfo;
- const BigramDictionary *mBigramDictionary;
- SuggestInterface *mGestureSuggest;
- SuggestInterface *mTypingSuggest;
+ DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
+ const BigramDictionary *const mBigramDictionary;
+ const SuggestInterface *const mGestureSuggest;
+ const SuggestInterface *const mTypingSuggest;
+
+ void logDictionaryInfo(JNIEnv *const env) const;
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index af378b1b7..3271c1bfb 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -19,7 +19,7 @@
#include <cstdlib>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_header.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -35,8 +35,9 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
{ DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
- const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) {
- const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header);
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const int compositeGlyphCodePoint) {
+ const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy);
if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
return true;
}
@@ -45,11 +46,11 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
// Returns the digraph type associated with the given dictionary.
/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
- const BinaryDictionaryHeader *const header) {
- if (header->requiresGermanUmlautProcessing()) {
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ if (headerPolicy->requiresGermanUmlautProcessing()) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
- if (header->requiresFrenchLigatureProcessing()) {
+ if (headerPolicy->requiresFrenchLigatureProcessing()) {
return DIGRAPH_TYPE_FRENCH_LIGATURES;
}
return DIGRAPH_TYPE_NONE;
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h
index 9d74fe3a6..6ae16e390 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.h
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h
@@ -21,7 +21,7 @@
namespace latinime {
-class BinaryDictionaryHeader;
+class DictionaryHeaderStructurePolicy;
class DigraphUtils {
public:
@@ -39,14 +39,15 @@ class DigraphUtils {
typedef struct { int first; int second; int compositeGlyph; } digraph_t;
- static bool hasDigraphForCodePoint(
- const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint);
+ static bool hasDigraphForCodePoint(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const int compositeGlyphCodePoint);
static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
const DigraphCodePointIndex digraphCodePointIndex);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
- static DigraphType getDigraphTypeForDictionary(const BinaryDictionaryHeader *const header);
+ static DigraphType getDigraphTypeForDictionary(
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
static int getAllDigraphsForDigraphTypeAndReturnSize(
const DigraphType digraphType, const digraph_t **const digraphs);
static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint);
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index d5eafe1bf..fb4a80083 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -21,9 +21,9 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/dictionary/probability_utils.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/hash_map_compat.h"
namespace latinime {
@@ -38,7 +38,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
- int getBigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition);
@@ -46,12 +46,12 @@ class MultiBigramMap {
return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(binaryDictionaryInfo, wordPosition);
+ addBigramsForWordPosition(structurePolicy, wordPosition);
return mBigramMaps[wordPosition].getBigramProbability(
nextWordPosition, unigramProbability);
}
- return readBigramProbabilityFromBinaryDictionary(binaryDictionaryInfo,
- wordPosition, nextWordPosition, unigramProbability);
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
+ nextWordPosition, unigramProbability);
}
void clear() {
@@ -66,12 +66,16 @@ class MultiBigramMap {
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
~BigramMap() {}
- void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
- const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
- getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
+ void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int nodePos) {
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
+ continue;
+ }
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
}
@@ -100,16 +104,16 @@ class MultiBigramMap {
};
AK_FORCE_INLINE void addBigramsForWordPosition(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int position) {
- mBigramMaps[position].init(binaryDictionaryInfo, position);
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
+ mBigramMaps[position].init(structurePolicy, position);
}
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
- const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
- getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) {
diff --git a/native/jni/src/suggest/core/dictionary/probability_utils.h b/native/jni/src/suggest/core/dictionary/probability_utils.h
index f450087d8..21fe355b8 100644
--- a/native/jni/src/suggest/core/dictionary/probability_utils.h
+++ b/native/jni/src/suggest/core/dictionary/probability_utils.h
@@ -41,7 +41,7 @@ class ProbabilityUtils {
// the unigram probability to be the median value of the 17th step from the top. A value of
// 0 for the bigram probability represents the middle of the 16th step from the top,
// while a value of 15 represents the middle of the top step.
- // See makedict.BinaryDictInputOutput for details.
+ // See makedict.BinaryDictEncoder#makeBigramFlags for details.
const float stepSize = static_cast<float>(MAX_PROBABILITY - unigramProbability)
/ (1.5f + MAX_BIGRAM_ENCODED_PROBABILITY);
return unigramProbability
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
index 3c2180937..461d7b454 100644
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
@@ -19,21 +19,20 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/dictionary/terminal_attributes.h"
+#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
namespace latinime {
class ShortcutUtils {
public:
- static int outputShortcuts(const TerminalAttributes *const terminalAttributes,
+ static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
int outputWordIndex, const int finalScore, int *const outputCodePoints,
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
- TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
int shortcutTarget[MAX_WORD_LENGTH];
- while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
+ while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
bool isWhilelist;
int shortcutTargetStringLength;
- iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
+ shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetStringLength, &isWhilelist);
int shortcutScore;
int kind;
diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
deleted file mode 100644
index 0da6504eb..000000000
--- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
-#define LATINIME_TERMINAL_ATTRIBUTES_H
-
-#include <stdint.h>
-
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-
-namespace latinime {
-
-/**
- * This class encapsulates information about a terminal that allows to
- * retrieve local node attributes like the list of shortcuts without
- * exposing the format structure to the client.
- */
-class TerminalAttributes {
- public:
- class ShortcutIterator {
- public:
- ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int shortcutPos, const bool hasShortcutList)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
- mHasNextShortcutTarget(hasShortcutList) {}
-
- inline bool hasNextShortcutTarget() const {
- return mHasNextShortcutTarget;
- }
-
- // Gets the shortcut target itself as an int string and put it to outTarget, put its length
- // to outTargetLength, put whether it is whitelist to outIsWhitelist.
- AK_FORCE_INLINE void nextShortcutTarget(
- const int maxDepth, int *const outTarget, int *const outTargetLength,
- bool *const outIsWhitelist) {
- const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
- BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
- mBinaryDictionaryInfo, &mPos);
- mHasNextShortcutTarget =
- BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
- if (outIsWhitelist) {
- *outIsWhitelist =
- BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
- }
- if (outTargetLength) {
- *outTargetLength =
- BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
- mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
- }
- }
-
- private:
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- int mPos;
- bool mHasNextShortcutTarget;
- };
-
- TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int shortcutPos)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {}
-
- inline ShortcutIterator getShortcutIterator() const {
- int shortcutPos = mShortcutListSizePos;
- const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS;
- if (hasShortcutList) {
- BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
- mBinaryDictionaryInfo, &shortcutPos);
- }
- // shortcutPos is never used if hasShortcutList is false.
- return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const int mShortcutListSizePos;
-};
-} // namespace latinime
-#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h
new file mode 100644
index 000000000..661ef1b1a
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of bigrams.
+ */
+class DictionaryBigramsStructurePolicy {
+ public:
+ virtual ~DictionaryBigramsStructurePolicy() {}
+
+ virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const pos) const = 0;
+ virtual void skipAllBigrams(int *const pos) const = 0;
+
+ protected:
+ DictionaryBigramsStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryBigramsStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
new file mode 100644
index 000000000..a6829b476
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of dictionaries.
+ * Implement this policy to support additional dictionaries.
+ */
+class DictionaryHeaderStructurePolicy {
+ public:
+ virtual ~DictionaryHeaderStructurePolicy() {}
+
+ virtual bool supportsDynamicUpdate() const = 0;
+
+ virtual bool requiresGermanUmlautProcessing() const = 0;
+
+ virtual bool requiresFrenchLigatureProcessing() const = 0;
+
+ virtual float getMultiWordCostMultiplier() const = 0;
+
+ virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
+ int outValueSize) const = 0;
+
+ protected:
+ DictionaryHeaderStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryHeaderStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h
new file mode 100644
index 000000000..40b6c2de1
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of shortcuts.
+ */
+class DictionaryShortcutsStructurePolicy {
+ public:
+ virtual ~DictionaryShortcutsStructurePolicy() {}
+
+ virtual int getStartPos(const int pos) const = 0;
+
+ virtual void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const = 0;
+
+ virtual void skipAllShortcuts(int *const pos) const = 0;
+
+ protected:
+ DictionaryShortcutsStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryShortcutsStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_policy.h
deleted file mode 100644
index cc14c982c..000000000
--- a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_H
-#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class BinaryDictionaryInfo;
-class DicNode;
-class DicNodeVector;
-
-/*
- * This class abstracts structure of dictionaries.
- * Implement this policy to support additional dictionaries.
- */
-class DictionaryStructurePolicy {
- public:
- // This provides a filtering method for filtering new node.
- class NodeFilter {
- public:
- virtual bool isFilteredOut(const int codePoint) const = 0;
-
- protected:
- NodeFilter() {}
- virtual ~NodeFilter() {}
-
- private:
- DISALLOW_COPY_AND_ASSIGN(NodeFilter);
- };
-
- virtual int getRootPosition() const = 0;
-
- virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0;
-
- virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const = 0;
-
- virtual int getTerminalNodePositionOfWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const = 0;
-
- virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const = 0;
-
- virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const = 0;
-
- virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const = 0;
-
- protected:
- DictionaryStructurePolicy() {}
- virtual ~DictionaryStructurePolicy() {}
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy);
-};
-} // namespace latinime
-#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
new file mode 100644
index 000000000..532411509
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+class DictionaryBigramsStructurePolicy;
+class DictionaryHeaderStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class abstracts structure of dictionaries.
+ * Implement this policy to support additional dictionaries.
+ */
+class DictionaryStructureWithBufferPolicy {
+ public:
+ virtual ~DictionaryStructureWithBufferPolicy() {}
+
+ virtual int getRootPosition() const = 0;
+
+ virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const = 0;
+
+ virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int nodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const = 0;
+
+ virtual int getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const = 0;
+
+ virtual int getUnigramProbability(const int nodePos) const = 0;
+
+ virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
+
+ virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
+
+ virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
+
+ virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0;
+
+ virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool addUnigramWord(const int *const word, const int length,
+ const int probability) = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability) = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) = 0;
+
+ protected:
+ DictionaryStructureWithBufferPolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index 58729229f..f9b777df2 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -148,7 +148,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
case CT_TERMINAL: {
const float languageImprobability =
DicNodeUtils::getBigramNodeImprobability(
- traverseSession->getBinaryDictionaryInfo(), dicNode, multiBigramMap);
+ traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap);
return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability);
}
case CT_TERMINAL_INSERTION:
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 7651b19a0..0ca583f90 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -17,32 +17,30 @@
#include "suggest/core/session/dic_traverse_session.h"
#include "defines.h"
-#include "jni.h"
-#include "suggest/core/dictionary/binary_dictionary_header.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
int prevWordLength, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
- const BinaryDictionaryInfo *const binaryDictionaryInfo =
- mDictionary->getBinaryDictionaryInfo();
- mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier();
+ mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
+ ->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
mPrevWordPos = NOT_A_VALID_WORD_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
- binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */);
+ mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ prevWord, prevWordLength, false /* forceLowerCaseSearch */);
if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
- binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */);
+ mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}
@@ -56,8 +54,9 @@ void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
maxSpatialDistance, maxPointerCount);
}
-const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const {
- return mDictionary->getBinaryDictionaryInfo();
+const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStructurePolicy()
+ const {
+ return mDictionary->getDictionaryStructurePolicy();
}
void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) {
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index de57e041a..23de5cc65 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -28,8 +28,8 @@
namespace latinime {
-class BinaryDictionaryInfo;
class Dictionary;
+class DictionaryStructureWithBufferPolicy;
class ProximityInfo;
class SuggestOptions;
@@ -75,8 +75,7 @@ class DicTraverseSession {
const int maxPointerCount);
void resetCache(const int nextActiveCacheSize, const int maxWords);
- // TODO: Remove
- const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
+ const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const;
//--------------------
// getters and setters
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 9376d7b93..7d8dd21c5 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -19,12 +19,12 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/shortcut_utils.h"
-#include "suggest/core/dictionary/terminal_attributes.h"
#include "suggest/core/layout/proximity_info.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/policy/scoring.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/policy/weighting.h"
@@ -107,7 +107,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
MAX_RESULTS);
// Create a new dic node here
DicNode rootNode;
- DicNodeUtils::initAsRoot(traverseSession->getBinaryDictionaryInfo(),
+ DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
traverseSession->getPrevWordPos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
@@ -211,15 +211,14 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
}
if (!terminalDicNode->hasMultipleWords()) {
- const BinaryDictionaryInfo *const binaryDictionaryInfo =
- traverseSession->getBinaryDictionaryInfo();
- const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
- binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
- binaryDictionaryInfo, terminalDicNode->getPos()));
+ BinaryDictionaryShortcutIterator shortcutIt(
+ traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getShortcutPositionOfNode(terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
+ outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex,
finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
}
DicNode::managedDelete(terminalDicNode);
@@ -298,7 +297,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
DicNodeUtils::getAllChildDicNodes(
- &dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes);
+ &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
const int childDicNodesSize = childDicNodes.getSizeAndLock();
for (int i = 0; i < childDicNodesSize; ++i) {
@@ -309,7 +308,8 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
continue;
}
if (DigraphUtils::hasDigraphForCodePoint(
- traverseSession->getBinaryDictionaryInfo()->getHeader(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getHeaderStructurePolicy(),
childDicNode->getNodeCodePoint())) {
correctionDicNode.initByCopy(childDicNode);
correctionDicNode.advanceDigraphIndex();
@@ -447,7 +447,7 @@ void Suggest::processDicNodeAsOmission(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
DicNodeVector childDicNodes;
DicNodeUtils::getAllChildDicNodes(
- dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes);
+ dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
const int size = childDicNodes.getSizeAndLock();
for (int i = 0; i < size; i++) {
@@ -456,7 +456,6 @@ void Suggest::processDicNodeAsOmission(
Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession,
dicNode, childDicNode, 0 /* multiBigramMap */);
weightChildNode(traverseSession, childDicNode);
-
if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) {
continue;
}
@@ -472,10 +471,14 @@ void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
DicNode *dicNode) const {
const int16_t pointIndex = dicNode->getInputIndex(0);
DicNodeVector childDicNodes;
- DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(),
- traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes);
+ DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
+ &childDicNodes);
const int size = childDicNodes.getSizeAndLock();
for (int i = 0; i < size; i++) {
+ if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1)
+ != childDicNodes[i]->getNodeCodePoint()) {
+ continue;
+ }
DicNode *const childDicNode = childDicNodes[i];
Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession,
dicNode, childDicNode, 0 /* multiBigramMap */);
@@ -490,18 +493,29 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
DicNode *dicNode) const {
const int16_t pointIndex = dicNode->getInputIndex(0);
DicNodeVector childDicNodes1;
- DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(),
- traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1);
+ DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
+ &childDicNodes1);
const int childSize1 = childDicNodes1.getSizeAndLock();
for (int i = 0; i < childSize1; i++) {
+ const ProximityType matchedId1 = traverseSession->getProximityInfoState(0)
+ ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(),
+ true /* checkProximityChars */);
+ if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
+ continue;
+ }
if (childDicNodes1[i]->hasChildren()) {
DicNodeVector childDicNodes2;
- DicNodeUtils::getProximityChildDicNodes(
- childDicNodes1[i], traverseSession->getBinaryDictionaryInfo(),
- traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2);
+ DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
+ traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
const int childSize2 = childDicNodes2.getSizeAndLock();
for (int j = 0; j < childSize2; j++) {
DicNode *const childDicNode2 = childDicNodes2[j];
+ const ProximityType matchedId2 = traverseSession->getProximityInfoState(0)
+ ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(),
+ true /* checkProximityChars */);
+ if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
+ continue;
+ }
Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION,
traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
processExpandedDicNode(traverseSession, childDicNode2);
@@ -538,7 +552,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode
// Create a non-cached node here.
DicNode newDicNode;
DicNodeUtils::initAsRootWithPreviousWord(
- traverseSession->getBinaryDictionaryInfo(), dicNode, &newDicNode);
+ traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
const CorrectionType correctionType = spaceSubstitution ?
CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION;
Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
new file mode 100644
index 000000000..26015d512
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_LIST_POLICY_H
+#define LATINIME_BIGRAM_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+
+namespace latinime {
+
+class BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
+
+ ~BigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const pos) const {
+ const BigramListReadWriteUtils::BigramFlags flags =
+ BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
+ *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ mBigramsBuf, flags, pos);
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ }
+
+ void skipAllBigrams(int *const pos) const {
+ BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
+
+ const uint8_t *const mBigramsBuf;
+};
+} // namespace latinime
+#endif // LATINIME_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
new file mode 100644
index 000000000..d57547445
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
+ 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
+ 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+
+/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
+ int offset = 0;
+ const int origin = *pos;
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
+ break;
+ }
+ if (offset == 0) {
+ return NOT_A_VALID_WORD_POS;
+ }
+ if (isOffsetNegative(flags)) {
+ return origin - offset;
+ } else {
+ return origin + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
new file mode 100644
index 000000000..ee2b722a4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+#define LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+
+#include <cstdlib>
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+class BigramListReadWriteUtils {
+public:
+ typedef uint8_t BigramFlags;
+
+ static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer(
+ const uint8_t *const bigramsBuf, int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+ }
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // Bigrams reading methods
+ static AK_FORCE_INLINE void skipExistingBigrams(const uint8_t *const bigramsBuf,
+ int *const pos) {
+ BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
+ while (hasNext(flags)) {
+ *pos += attributeAddressSize(flags);
+ flags = getFlagsAndForwardPointer(bigramsBuf, pos);
+ }
+ *pos += attributeAddressSize(flags);
+ }
+
+ static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
+ const BigramFlags flags, int *const pos);
+
+ // Returns the size of the bigram position field that is stored in bigram flags.
+ static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+ return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
+ /* Note: this is a value-dependant optimization of what may probably be
+ more readably written this way:
+ switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
+ default: return 0;
+ }
+ */
+ }
+
+ static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) {
+ return flags | FLAG_ATTRIBUTE_HAS_NEXT;
+ }
+
+ // Returns true if the bigram entry is valid and put entry values into out*.
+ static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
+ const int entryPos, const int targetPos, const int probability, const bool hasNext,
+ BigramFlags *const outBigramFlags, uint32_t *const outOffset,
+ int *const outOffsetFieldSize) {
+ if (targetPos == NOT_A_VALID_WORD_POS) {
+ return false;
+ }
+ BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
+ if (hasNext) {
+ flags |= FLAG_ATTRIBUTE_HAS_NEXT;
+ }
+ const int targetFieldPos = entryPos + 1;
+ const int offset = targetPos - targetFieldPos;
+ if (offset < 0) {
+ flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ }
+ const uint32_t absOffest = abs(offset);
+ if ((absOffest >> 24) != 0) {
+ // Offset is too large.
+ return false;
+ } else if ((absOffest >> 16) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ *outOffsetFieldSize = 3;
+ } else if ((absOffest >> 8) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ *outOffsetFieldSize = 2;
+ } else {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ *outOffsetFieldSize = 1;
+ }
+ *outBigramFlags = flags;
+ *outOffset = absOffest;
+ return true;
+ }
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
+
+ static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int ATTRIBUTE_ADDRESS_SHIFT;
+
+ static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+ }
+};
+} // namespace latinime
+#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
new file mode 100644
index 000000000..e31a91069
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+
+namespace latinime {
+
+bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *fromPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags flags;
+ do {
+ flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
+ int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, fromPos);
+ if (bigramPos == NOT_A_VALID_WORD_POS) {
+ // skip invalid bigram entry.
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ bigramPos += mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags newBigramFlags;
+ uint32_t newBigramOffset;
+ int newBigramOffsetFieldSize;
+ if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
+ *toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
+ BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
+ &newBigramOffsetFieldSize)) {
+ continue;
+ }
+ // Write bigram entry. Target buffer is always the additional buffer.
+ if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
+ return false;
+ }
+ if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
+ toPos)) {
+ return false;
+ }
+ } while(BigramListReadWriteUtils::hasNext(flags));
+ if (usesAdditionalBuffer) {
+ *fromPos += mBuffer->getOriginalBufferSize();
+ }
+ return true;
+}
+
+bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int probability,
+ int *const pos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags flags;
+ do {
+ int entryPos = *pos;
+ if (usesAdditionalBuffer) {
+ entryPos += mBuffer->getOriginalBufferSize();
+ }
+ flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
+ BigramListReadWriteUtils::getBigramAddressAndForwardPointer(buffer, flags, pos);
+ if (BigramListReadWriteUtils::hasNext(flags)) {
+ continue;
+ }
+ // The current last entry is found.
+ // First, update the flags of the last entry.
+ const BigramListReadWriteUtils::BigramFlags updatedFlags =
+ BigramListReadWriteUtils::setHasNextFlag(flags);
+ if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
+ return false;
+ }
+ // Then, add a new entry after the last entry.
+ BigramListReadWriteUtils::BigramFlags newBigramFlags;
+ uint32_t newBigramOffset;
+ int newBigramOffsetFieldSize;
+ if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
+ *pos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
+ BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
+ &newBigramOffsetFieldSize)) {
+ continue;
+ }
+ int newEntryPos = *pos;
+ if (usesAdditionalBuffer) {
+ newEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ // Write bigram flags.
+ if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,
+ &newEntryPos)) {
+ return false;
+ }
+ // Write bigram positon offset.
+ if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
+ &newEntryPos)) {
+ return false;
+ }
+ } while(BigramListReadWriteUtils::hasNext(flags));
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ return true;
+}
+
+bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = bigramListPos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags flags;
+ do {
+ flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos);
+ int bigramOffsetFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, &pos);
+ if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) {
+ bigramPos += mBuffer->getOriginalBufferSize();
+ }
+ if (bigramPos != targetBigramPos) {
+ continue;
+ }
+ // Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid.
+ const int bigramOffsetFieldSize =
+ BigramListReadWriteUtils::attributeAddressSize(flags);
+ if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize,
+ &bigramOffsetFieldPos)) {
+ return false;
+ }
+ return true;
+ } while(BigramListReadWriteUtils::hasNext(flags));
+ return false;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
new file mode 100644
index 000000000..b7c05376d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+/*
+ * This is a dynamic version of BigramListPolicy and supports an additional buffer.
+ */
+class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer)
+ : mBuffer(buffer) {}
+
+ ~DynamicBigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const BigramListReadWriteUtils::BigramFlags flags =
+ BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
+ *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, pos);
+ if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) {
+ *outBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ void skipAllBigrams(int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
+ // positions after bigram lists. This method skips invalid bigram entries.
+ bool copyAllBigrams(int *const fromPos, int *const toPos);
+
+ bool addBigramEntry(const int bigramPos, const int probability, int *const pos);
+
+ // Return if targetBigramPos is found or not.
+ bool removeBigram(const int bigramListPos, const int targetBigramPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
+
+ BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/binary_format.h b/native/jni/src/suggest/policyimpl/dictionary/binary_format.h
deleted file mode 100644
index 23f4c7fec..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/binary_format.h
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BINARY_FORMAT_H
-#define LATINIME_BINARY_FORMAT_H
-
-#include <stdint.h>
-
-#include "suggest/core/dictionary/probability_utils.h"
-#include "utils/char_utils.h"
-
-namespace latinime {
-
-class BinaryFormat {
- public:
- // Mask and flags for children address type selection.
- static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
-
- // Flag for single/multiple char group
- static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
-
- // Flag for terminal groups
- static const int FLAG_IS_TERMINAL = 0x10;
-
- // Flag for shortcut targets presence
- static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
- // Flag for bigram presence
- static const int FLAG_HAS_BIGRAMS = 0x04;
- // Flag for non-words (typically, shortcut only entries)
- static const int FLAG_IS_NOT_A_WORD = 0x02;
- // Flag for blacklist
- static const int FLAG_IS_BLACKLISTED = 0x01;
-
- // Attribute (bigram/shortcut) related flags:
- // Flag for presence of more attributes
- static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
- // Flag for sign of offset. If this flag is set, the offset value must be negated.
- static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-
- // Mask for attribute probability, stored on 4 bits inside the flags byte.
- static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-
- // Mask and flags for attribute address type selection.
- static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-
- static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
- static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
- static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
- static int readProbabilityWithoutMovingPointer(const uint8_t *const dict, const int pos);
- static int skipOtherCharacters(const uint8_t *const dict, const int pos);
- static int skipChildrenPosition(const uint8_t flags, const int pos);
- static int skipProbability(const uint8_t flags, const int pos);
- static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
- static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
- const int pos);
- static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
- static bool hasChildrenInFlags(const uint8_t flags);
- static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
- const int length, const bool forceLowerCaseSearch);
- static int getCodePointsAndProbabilityAndReturnCodePointCount(
- const uint8_t *const root, const int nodePos, const int maxCodePointCount,
- int *const outCodePoints, int *const outUnigramProbability);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
-
- static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
- static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
- static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
- static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-
- static const int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
- static const int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
- static const int CHARACTER_ARRAY_TERMINATOR = 0x1F;
- static const int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
- static const int NO_FLAGS = 0;
- static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
- static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
-};
-
-AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
- int *pos) {
- const int msb = dict[(*pos)++];
- if (msb < 0x80) return msb;
- return ((msb & 0x7F) << 8) | dict[(*pos)++];
-}
-
-inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) {
- return dict[(*pos)++];
-}
-
-AK_FORCE_INLINE int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict,
- int *pos) {
- const int origin = *pos;
- const int codePoint = dict[origin];
- if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
- if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
- *pos = origin + 1;
- return NOT_A_CODE_POINT;
- } else {
- *pos = origin + 3;
- const int char_1 = codePoint << 16;
- const int char_2 = char_1 + (dict[origin + 1] << 8);
- return char_2 + dict[origin + 2];
- }
- } else {
- *pos = origin + 1;
- return codePoint;
- }
-}
-
-inline int BinaryFormat::readProbabilityWithoutMovingPointer(const uint8_t *const dict,
- const int pos) {
- return dict[pos];
-}
-
-AK_FORCE_INLINE int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
- int currentPos = pos;
- int character = dict[currentPos++];
- while (CHARACTER_ARRAY_TERMINATOR != character) {
- if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
- currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
- }
- character = dict[currentPos++];
- }
- return currentPos;
-}
-
-static inline int attributeAddressSize(const uint8_t flags) {
- static const int ATTRIBUTE_ADDRESS_SHIFT = 4;
- return (flags & BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
- /* Note: this is a value-dependant optimization of what may probably be
- more readably written this way:
- switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
- default: return 0;
- }
- */
-}
-
-static AK_FORCE_INLINE int skipExistingBigrams(const uint8_t *const dict, const int pos) {
- int currentPos = pos;
- uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
- while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) {
- currentPos += attributeAddressSize(flags);
- flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
- }
- currentPos += attributeAddressSize(flags);
- return currentPos;
-}
-
-static inline int childrenAddressSize(const uint8_t flags) {
- static const int CHILDREN_ADDRESS_SHIFT = 6;
- return (BinaryFormat::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
- /* See the note in attributeAddressSize. The same applies here */
-}
-
-static AK_FORCE_INLINE int shortcutByteSize(const uint8_t *const dict, const int pos) {
- return (static_cast<int>(dict[pos] << 8)) + (dict[pos + 1]);
-}
-
-inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos) {
- return pos + childrenAddressSize(flags);
-}
-
-inline int BinaryFormat::skipProbability(const uint8_t flags, const int pos) {
- return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
-}
-
-AK_FORCE_INLINE int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
- const int pos) {
- if (FLAG_HAS_SHORTCUT_TARGETS & flags) {
- return pos + shortcutByteSize(dict, pos);
- } else {
- return pos;
- }
-}
-
-AK_FORCE_INLINE int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
- const int pos) {
- if (FLAG_HAS_BIGRAMS & flags) {
- return skipExistingBigrams(dict, pos);
- } else {
- return pos;
- }
-}
-
-AK_FORCE_INLINE int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
- const int pos) {
- // This function skips all attributes: shortcuts and bigrams.
- int newPos = pos;
- newPos = skipShortcuts(dict, flags, newPos);
- newPos = skipBigrams(dict, flags, newPos);
- return newPos;
-}
-
-AK_FORCE_INLINE int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
- const uint8_t flags, const int pos) {
- int currentPos = pos;
- currentPos = skipChildrenPosition(flags, currentPos);
- currentPos = skipAllAttributes(dict, flags, currentPos);
- return currentPos;
-}
-
-AK_FORCE_INLINE int BinaryFormat::readChildrenPosition(const uint8_t *const dict,
- const uint8_t flags, const int pos) {
- int offset = 0;
- switch (MASK_GROUP_ADDRESS_TYPE & flags) {
- case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
- offset = dict[pos];
- break;
- case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
- offset = dict[pos] << 8;
- offset += dict[pos + 1];
- break;
- case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
- offset = dict[pos] << 16;
- offset += dict[pos + 1] << 8;
- offset += dict[pos + 2];
- break;
- default:
- // If we come here, it means we asked for the children of a word with
- // no children.
- return -1;
- }
- return pos + offset;
-}
-
-inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
- return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
-}
-
-// This function gets the byte position of the last chargroup of the exact matching word in the
-// dictionary. If no match is found, it returns NOT_A_VALID_WORD_POS.
-AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
- const int *const inWord, const int length, const bool forceLowerCaseSearch) {
- int pos = 0;
- int wordPos = 0;
-
- while (true) {
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- if (wordPos >= length) return NOT_A_VALID_WORD_POS;
- int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
- const int wChar = forceLowerCaseSearch
- ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
- while (true) {
- // If there are no more character groups in this node, it means we could not
- // find a matching character for this depth, therefore there is no match.
- if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS;
- const int charGroupPos = pos;
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- if (character == wChar) {
- // This is the correct node. Only one character group may start with the same
- // char within a node, so either we found our match in this node, or there is
- // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the
- // characters in this character group indeed does match.
- if (FLAG_HAS_MULTIPLE_CHARS & flags) {
- character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- while (NOT_A_CODE_POINT != character) {
- ++wordPos;
- // If we shoot the length of the word we search for, or if we find a single
- // character that does not match, as explained above, it means the word is
- // not in the dictionary (by virtue of this chargroup being the only one to
- // match the word on the first character, but not matching the whole word).
- if (wordPos >= length) return NOT_A_VALID_WORD_POS;
- if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS;
- character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- }
- }
- // If we come here we know that so far, we do match. Either we are on a terminal
- // and we match the length, in which case we found it, or we traverse children.
- // If we don't match the length AND don't have children, then a word in the
- // dictionary fully matches a prefix of the searched word but not the full word.
- ++wordPos;
- if (FLAG_IS_TERMINAL & flags) {
- if (wordPos == length) {
- return charGroupPos;
- }
- pos = BinaryFormat::skipProbability(FLAG_IS_TERMINAL, pos);
- }
- if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
- return NOT_A_VALID_WORD_POS;
- }
- // We have children and we are still shorter than the word we are searching for, so
- // we need to traverse children. Put the pointer on the children position, and
- // break
- pos = BinaryFormat::readChildrenPosition(root, flags, pos);
- break;
- } else {
- // This chargroup does not match, so skip the remaining part and go to the next.
- if (FLAG_HAS_MULTIPLE_CHARS & flags) {
- pos = BinaryFormat::skipOtherCharacters(root, pos);
- }
- pos = BinaryFormat::skipProbability(flags, pos);
- pos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
- }
- --charGroupCount;
- }
- }
-}
-
-// This function searches for a terminal in the dictionary by its address.
-// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
-// it is possible to check for this with advantageous complexity. For each node, we search
-// for groups with children and compare the children address with the address we look for.
-// When we shoot the address we look for, it means the word we look for is in the children
-// of the previous group. The only tricky part is the fact that if we arrive at the end of a
-// node with the last group's children address still less than what we are searching for, we
-// must descend the last group's children (for example, if the word we are searching for starts
-// with a z, it's the last group of the root node, so all children addresses will be smaller
-// than the address we look for, and we have to descend the z node).
-/* Parameters :
- * root: the dictionary buffer
- * address: the byte position of the last chargroup of the word we are searching for (this is
- * what is stored as the "bigram address" in each bigram)
- * outword: an array to write the found word, with MAX_WORD_LENGTH size.
- * outUnigramProbability: a pointer to an int to write the probability into.
- * Return value : the length of the word, of 0 if the word was not found.
- */
-AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(
- const uint8_t *const root, const int nodePos, const int maxCodePointCount,
- int *const outCodePoints, int *const outUnigramProbability) {
- int pos = 0;
- int wordPos = 0;
-
- // One iteration of the outer loop iterates through nodes. As stated above, we will only
- // traverse nodes that are actually a part of the terminal we are searching, so each time
- // we enter this loop we are one depth level further than last time.
- // The only reason we count nodes is because we want to reduce the probability of infinite
- // looping in case there is a bug. Since we know there is an upper bound to the depth we are
- // supposed to traverse, it does not hurt to count iterations.
- for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
- int lastCandidateGroupPos = 0;
- // Let's loop through char groups in this node searching for either the terminal
- // or one of its ascendants.
- for (int charGroupCount = getGroupCountAndForwardPointer(root, &pos); charGroupCount > 0;
- --charGroupCount) {
- const int startPos = pos;
- const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
- const int character = getCodePointAndForwardPointer(root, &pos);
- if (nodePos == startPos) {
- // We found the address. Copy the rest of the word in the buffer and return
- // the length.
- outCodePoints[wordPos] = character;
- if (FLAG_HAS_MULTIPLE_CHARS & flags) {
- int nextChar = getCodePointAndForwardPointer(root, &pos);
- // We count chars in order to avoid infinite loops if the file is broken or
- // if there is some other bug
- int charCount = maxCodePointCount;
- while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
- outCodePoints[++wordPos] = nextChar;
- nextChar = getCodePointAndForwardPointer(root, &pos);
- }
- }
- *outUnigramProbability = readProbabilityWithoutMovingPointer(root, pos);
- return ++wordPos;
- }
- // We need to skip past this char group, so skip any remaining chars after the
- // first and possibly the probability.
- if (FLAG_HAS_MULTIPLE_CHARS & flags) {
- pos = skipOtherCharacters(root, pos);
- }
- pos = skipProbability(flags, pos);
-
- // The fact that this group has children is very important. Since we already know
- // that this group does not match, if it has no children we know it is irrelevant
- // to what we are searching for.
- const bool hasChildren = (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
- (MASK_GROUP_ADDRESS_TYPE & flags));
- // We will write in `found' whether we have passed the children address we are
- // searching for. For example if we search for "beer", the children of b are less
- // than the address we are searching for and the children of c are greater. When we
- // come here for c, we realize this is too big, and that we should descend b.
- bool found;
- if (hasChildren) {
- // Here comes the tricky part. First, read the children position.
- const int childrenPos = readChildrenPosition(root, flags, pos);
- if (childrenPos > nodePos) {
- // If the children pos is greater than address, it means the previous chargroup,
- // which address is stored in lastCandidateGroupPos, was the right one.
- found = true;
- } else if (1 >= charGroupCount) {
- // However if we are on the LAST group of this node, and we have NOT shot the
- // address we should descend THIS node. So we trick the lastCandidateGroupPos
- // so that we will descend this node, not the previous one.
- lastCandidateGroupPos = startPos;
- found = true;
- } else {
- // Else, we should continue looking.
- found = false;
- }
- } else {
- // Even if we don't have children here, we could still be on the last group of this
- // node. If this is the case, we should descend the last group that had children,
- // and their address is already in lastCandidateGroup.
- found = (1 >= charGroupCount);
- }
-
- if (found) {
- // Okay, we found the group we should descend. Its address is in
- // the lastCandidateGroupPos variable, so we just re-read it.
- if (0 != lastCandidateGroupPos) {
- const uint8_t lastFlags =
- getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
- const int lastChar =
- getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
- // We copy all the characters in this group to the buffer
- outCodePoints[wordPos] = lastChar;
- if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
- int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
- int charCount = maxCodePointCount;
- while (-1 != nextChar && --charCount > 0) {
- outCodePoints[++wordPos] = nextChar;
- nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
- }
- }
- ++wordPos;
- // Now we only need to branch to the children address. Skip the probability if
- // it's there, read pos, and break to resume the search at pos.
- lastCandidateGroupPos = skipProbability(lastFlags, lastCandidateGroupPos);
- pos = readChildrenPosition(root, lastFlags, lastCandidateGroupPos);
- break;
- } else {
- // Here is a little tricky part: we come here if we found out that all children
- // addresses in this group are bigger than the address we are searching for.
- // Should we conclude the word is not in the dictionary? No! It could still be
- // one of the remaining chargroups in this node, so we have to keep looking in
- // this node until we find it (or we realize it's not there either, in which
- // case it's actually not in the dictionary). Pass the end of this group, ready
- // to start the next one.
- pos = skipChildrenPosAndAttributes(root, flags, pos);
- }
- } else {
- // If we did not find it, we should record the last children address for the next
- // iteration.
- if (hasChildren) lastCandidateGroupPos = startPos;
- // Now skip the end of this group (children pos and the attributes if any) so that
- // our pos is after the end of this char group, at the start of the next one.
- pos = skipChildrenPosAndAttributes(root, flags, pos);
- }
-
- }
- }
- // If we have looked through all the chargroups and found no match, the address is
- // not the address of a terminal in this dictionary.
- return 0;
-}
-
-} // namespace latinime
-#endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
deleted file mode 100644
index c0df89f49..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
-#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
-
-namespace latinime {
-
-class DictionaryStructurePolicy;
-
-class DictionaryStructurePolicyFactory {
- public:
- static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
- switch (dictionaryFormat) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return PatriciaTriePolicy::getInstance();
- case BinaryDictionaryFormatUtils::VERSION_3:
- return DynamicPatriciaTriePolicy::getInstance();
- default:
- ASSERT(false);
- return 0;
- }
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
-};
-} // namespace latinime
-#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..434858d67
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(const char *const path, const int pathLength,
+ const int bufOffset, const int size, const bool isUpdatable) {
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // impl classes of DictionaryStructureWithBufferPolicy.
+ const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, pathLength, bufOffset,
+ size, isUpdatable);
+ if (!mmapedBuffer) {
+ return 0;
+ }
+ switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
+ mmapedBuffer->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return new PatriciaTriePolicy(mmapedBuffer);
+ case FormatUtils::VERSION_3:
+ return new DynamicPatriciaTriePolicy(mmapedBuffer);
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number");
+ delete mmapedBuffer;
+ ASSERT(false);
+ return 0;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
new file mode 100644
index 000000000..1cb7a89c4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicyFactory {
+ public:
+ static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
+ const char *const path, const int pathLength, const int bufOffset, const int size,
+ const bool isUpdatable);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index 7ac635a00..6bb90fc2d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -16,48 +16,54 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
- const uint8_t *const dictRoot = mBinaryDictionaryInfo->getDictRoot();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = nodePos;
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPos =
- DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos);
+ DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS;
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictRoot, mFlags, maxCodePointCount, outCodePoints, &pos);
+ dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
} else {
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- dictRoot, mFlags, MAX_WORD_LENGTH, &pos);
+ dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
}
if (isTerminal()) {
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
} else {
mProbability = NOT_A_PROBABILITY;
}
- if (hasChildren()) {
- mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictRoot, mFlags, &pos);
- } else {
- mChildrenPos = NOT_A_DICT_POS;
+ mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, mFlags, &pos);
+ if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
+ mChildrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
mShortcutPos = pos;
- BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
+ mShortcutsPolicy->skipAllShortcuts(&pos);
} else {
mShortcutPos = NOT_A_DICT_POS;
}
if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
mBigramPos = pos;
- BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
- mBinaryDictionaryInfo, &pos);
+ mBigramsPolicy->skipAllBigrams(&pos);
} else {
mBigramPos = NOT_A_DICT_POS;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
index 71558edaa..acc68b321 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -17,13 +17,17 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+#include <stdint.h>
+
#include "defines.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime {
-class BinaryDictionaryInfo;
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
/*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
@@ -31,12 +35,14 @@ class BinaryDictionaryInfo;
*/
class DynamicPatriciaTrieNodeReader {
public:
- explicit DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mNodePos(NOT_A_VALID_WORD_POS),
- mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
- mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_VALID_WORD_POS) {}
+ DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
+ mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
+ mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
~DynamicPatriciaTrieNodeReader() {}
@@ -63,7 +69,7 @@ class DynamicPatriciaTrieNodeReader {
}
AK_FORCE_INLINE bool hasChildren() const {
- return PatriciaTrieReadingUtils::hasChildrenInFlags(mFlags);
+ return mChildrenPos != NOT_A_DICT_POS;
}
AK_FORCE_INLINE bool isTerminal() const {
@@ -116,7 +122,9 @@ class DynamicPatriciaTrieNodeReader {
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ const BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mNodePos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 3df505688..3b9878b82 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -19,188 +19,127 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime {
-const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance;
-// To avoid infinite loop caused by invalid or malicious forward links.
-const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
+ DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
- int mergedNodeCodePoints[MAX_WORD_LENGTH];
- int nextPos = dicNode->getChildrenPos();
- int totalChildCount = 0;
- do {
- const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), &nextPos);
- totalChildCount += childCount;
- if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
- // Invalid dictionary.
- AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d",
- childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP);
- ASSERT(false);
- return;
- }
- for (int i = 0; i < childCount; i++) {
- nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nextPos, MAX_WORD_LENGTH,
- mergedNodeCodePoints);
- if (!nodeReader.isDeleted() && !nodeFilter->isFilteredOut(mergedNodeCodePoints[0])) {
- // Push child node when the node is not deleted and not filtered out.
- childDicNodes->pushLeavingChild(dicNode, nodeReader.getNodePos(),
- nodeReader.getChildrenPos(), nodeReader.getProbability(),
- nodeReader.isTerminal(), nodeReader.hasChildren(),
- nodeReader.isBlacklisted() || nodeReader.isNotAWord(),
- nodeReader.getCodePointCount(), mergedNodeCodePoints);
- }
- nextPos = nodeReader.getSiblingNodePos();
- }
- nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
- binaryDictionaryInfo->getDictRoot(), nextPos);
- } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ while (!readingHelper.isEnd()) {
+ childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
+ nodeReader->getChildrenPos(), nodeReader->getProbability(),
+ nodeReader->isTerminal() && !nodeReader->isDeleted(),
+ nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
+ nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
+ readingHelper.readNextSiblingNode();
+ }
}
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
// This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order.
int reverseCodePoints[maxCodePointCount];
- int mergedNodeCodePoints[maxCodePointCount];
- int codePointCount = 0;
-
- DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
- // First, read terminal node and get its probability.
- nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
- mergedNodeCodePoints);
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ // First, read the terminal node and get its probability.
+ readingHelper.initWithNodePos(nodePos);
+ if (!readingHelper.isValidTerminalNode()) {
+ // Node at the nodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
// Store terminal node probability.
- *outUnigramProbability = nodeReader.getProbability();
- // Store terminal node code points to buffer in the reverse order.
- for (int i = nodeReader.getCodePointCount() - 1; i >= 0; --i) {
- reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i];
- }
- // Then, follow parent pos toward the root node.
- while (nodeReader.getParentPos() != NOT_A_DICT_POS) {
- // codePointCount must be incremented at least once in each iteration to ensure preventing
- // infinite loop.
- if (nodeReader.isDeleted() || codePointCount > maxCodePointCount
- || nodeReader.getCodePointCount() <= 0) {
+ *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ while (!readingHelper.isEnd()) {
+ if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
// The nodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
- // Read parent node.
- nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodeReader.getParentPos(),
- maxCodePointCount, mergedNodeCodePoints);
// Store node code points to buffer in the reverse order.
- for (int i = nodeReader.getCodePointCount() - 1; i >= 0; --i) {
- reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i];
- }
+ readingHelper.fetchMergedNodeCodePointsInReverseOrder(
+ readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
+ // Follow parent node toward the root node.
+ readingHelper.readParentNode();
+ }
+ if (readingHelper.isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
}
// Reverse the stored code points to output them.
+ const int codePointCount = readingHelper.getTotalCodePointCount();
for (int i = 0; i < codePointCount; ++i) {
outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
}
return codePointCount;
}
-int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length];
for (int i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
}
- int mergedNodeCodePoints[MAX_WORD_LENGTH];
- int currentLength = 0;
- int pos = getRootPosition();
- DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
- while (currentLength <= length) {
- // When foundMatchedNode becomes true, currentLength is increased at least once.
- bool foundMatchedNode = false;
- int totalChildCount = 0;
- do {
- const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), &pos);
- totalChildCount += childCount;
- if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
- // Invalid dictionary.
- AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d",
- childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP);
- ASSERT(false);
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithNodeArrayPos(getRootPosition());
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ while (!readingHelper.isEnd()) {
+ const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
+ if (readingHelper.getTotalCodePointCount() > length
+ || !readingHelper.isMatchedCodePoint(0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readingHelper.readNextSiblingNode();
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = nodeReader->getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!readingHelper.isMatchedCodePoint(
+ j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
return NOT_A_VALID_WORD_POS;
}
- for (int i = 0; i < childCount; i++) {
- nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(pos, MAX_WORD_LENGTH,
- mergedNodeCodePoints);
- if (nodeReader.isDeleted() || nodeReader.getCodePointCount() <= 0) {
- // Skip deleted or empty node.
- pos = nodeReader.getSiblingNodePos();
- continue;
- }
- bool matched = true;
- for (int j = 0; j < nodeReader.getCodePointCount(); ++j) {
- if (mergedNodeCodePoints[j] != searchCodePoints[currentLength + j]) {
- // Different code point is found.
- matched = false;
- break;
- }
- }
- if (matched) {
- currentLength += nodeReader.getCodePointCount();
- if (length == currentLength) {
- // Terminal position is found.
- return nodeReader.getNodePos();
- }
- if (!nodeReader.hasChildren()) {
- return NOT_A_VALID_WORD_POS;
- }
- foundMatchedNode = true;
- // Advance to the children nodes.
- pos = nodeReader.getChildrenPos();
- break;
- }
- // Try next sibling node.
- pos = nodeReader.getSiblingNodePos();
- }
- if (foundMatchedNode) {
- break;
- }
- // If the matched node is not found in the current node group, try to follow the
- // forward link.
- pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
- binaryDictionaryInfo->getDictRoot(), pos);
- } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
- if (!foundMatchedNode) {
- // Matched node is not found.
+ }
+ // All characters are matched.
+ if (length == readingHelper.getTotalCodePointCount()) {
+ // Terminal position is found.
+ return nodeReader->getNodePos();
+ }
+ if (!nodeReader->hasChildren()) {
return NOT_A_VALID_WORD_POS;
}
+ // Advance to the children nodes.
+ readingHelper.readChildNode();
}
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
return NOT_A_VALID_WORD_POS;
}
-int DynamicPatriciaTriePolicy::getUnigramProbability(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
+int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
- DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
@@ -208,13 +147,12 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(
return nodeReader.getProbability();
}
-int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const {
+int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
@@ -222,13 +160,12 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
return nodeReader.getShortcutPos();
}
-int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const {
+int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
@@ -236,4 +173,60 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(
return nodeReader.getBigramsPos();
}
+bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
+ const int probability) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ return false;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithNodeArrayPos(getRootPosition());
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
+}
+
+bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1, const int probability) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_VALID_WORD_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_VALID_WORD_POS) {
+ return false;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ return writingHelper.addBigramWords(word0Pos, word1Pos, probability);
+}
+
+bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_VALID_WORD_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_VALID_WORD_POS) {
+ return false;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ return writingHelper.removeBigramWords(word0Pos, word1Pos);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 6a7977138..5873d3d65 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -18,18 +18,29 @@
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
-class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
-class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy {
+class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- static AK_FORCE_INLINE const DynamicPatriciaTriePolicy *getInstance() {
- return &sInstance;
+ DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
+ mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mBigramListPolicy(&mBufferWithExtendableBuffer),
+ mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
+
+ ~DynamicPatriciaTriePolicy() {
+ delete mBuffer;
}
AK_FORCE_INLINE int getRootPosition() const {
@@ -37,34 +48,49 @@ class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy {
}
void createAndGetAllChildNodes(const DicNode *const dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
+ DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
- int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const;
+ int getUnigramProbability(const int nodePos) const;
+
+ int getShortcutPositionOfNode(const int nodePos) const;
+
+ int getBigramsPositionOfNode(const int nodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramListPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability);
- int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const;
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability);
- int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const;
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1);
private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTriePolicy);
- static const DynamicPatriciaTriePolicy sInstance;
- static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
- DynamicPatriciaTriePolicy() {}
- ~DynamicPatriciaTriePolicy() {}
+ const MmappedBuffer *const mBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mBufferWithExtendableBuffer;
+ DynamicBigramListPolicy mBigramListPolicy;
+ DynamicShortcutListPolicy mShortcutListPolicy;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
new file mode 100644
index 000000000..2042fcbd2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+// To avoid infinite loop caused by invalid or malicious forward links.
+const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+
+// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
+// method to avoid an infinite loop.
+void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ mPos -= mBuffer->getOriginalBufferSize();
+ }
+ mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
+ &mPos);
+ if (usesAdditionalBuffer) {
+ mPos += mBuffer->getOriginalBufferSize();
+ }
+ // Count up nodes and node arrays to avoid infinite loop.
+ mTotalNodeCount += mNodeCount;
+ mNodeArrayCount++;
+ if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ // Invalid dictionary.
+ AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
+ "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
+ mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
+ mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ ASSERT(false);
+ mIsError = true;
+ mPos = NOT_A_DICT_POS;
+ return;
+ }
+ if (mNodeCount == 0) {
+ // Empty node array. Try following forward link.
+ followForwardLink();
+ }
+}
+
+// Follow the forward link and read the next node array if exists.
+void DynamicPatriciaTrieReadingHelper::followForwardLink() {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ mPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int forwardLinkPosition =
+ DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
+ if (usesAdditionalBuffer) {
+ mPos += mBuffer->getOriginalBufferSize();
+ }
+ if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
+ // Follow the forward link.
+ mPos = forwardLinkPosition;
+ nextNodeArray();
+ } else {
+ // All node arrays have been read.
+ mPos = NOT_A_DICT_POS;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
new file mode 100644
index 000000000..b108ed5fb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
+ * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
+ */
+class DynamicPatriciaTrieReadingHelper {
+ public:
+ DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
+ mTotalNodeCount(0), mNodeArrayCount(0), mBuffer(buffer),
+ mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
+
+ ~DynamicPatriciaTrieReadingHelper() {}
+
+ AK_FORCE_INLINE bool isError() const {
+ return mIsError;
+ }
+
+ AK_FORCE_INLINE bool isEnd() const {
+ return mPos == NOT_A_DICT_POS;
+ }
+
+ // Initialize reading state with the head position of a node array.
+ AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
+ if (nodeArrayPos == NOT_A_DICT_POS) {
+ mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mPos = nodeArrayPos;
+ mNodeCount = 0;
+ mPrevTotalCodePointCount = 0;
+ mTotalNodeCount = 0;
+ mNodeArrayCount = 0;
+ nextNodeArray();
+ if (!isEnd()) {
+ fetchNodeInfo();
+ }
+ }
+ }
+
+ // Initialize reading state with the head position of a node.
+ AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
+ // TODO: Consolidate NOT_A_VALID_WORD_POS and NOT_A_DICT_POS
+ if (nodePos == NOT_A_VALID_WORD_POS || nodePos == NOT_A_DICT_POS) {
+ mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mPos = nodePos;
+ mNodeCount = 1;
+ mPrevTotalCodePointCount = 0;
+ mTotalNodeCount = 1;
+ mNodeArrayCount = 1;
+ fetchNodeInfo();
+ }
+ }
+
+ AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
+ return &mNodeReader;
+ }
+
+ AK_FORCE_INLINE bool isValidTerminalNode() const {
+ return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
+ }
+
+ AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
+ return mMergedNodeCodePoints[index] == codePoint;
+ }
+
+ // Return code point count exclude the last read node's code points.
+ AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
+ return mPrevTotalCodePointCount;
+ }
+
+ // Return code point count include the last read node's code points.
+ AK_FORCE_INLINE int getTotalCodePointCount() const {
+ return mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
+ }
+
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
+ const int index, int *const outCodePoints) const {
+ const int nodeCodePointCount = mNodeReader.getCodePointCount();
+ for (int i = 0; i < nodeCodePointCount; ++i) {
+ outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
+ }
+ }
+
+ AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
+ return mMergedNodeCodePoints;
+ }
+
+ AK_FORCE_INLINE void readNextSiblingNode() {
+ mNodeCount -= 1;
+ mPos = mNodeReader.getSiblingNodePos();
+ if (mNodeCount <= 0) {
+ // All nodes in the current node array have been read.
+ followForwardLink();
+ if (!isEnd()) {
+ fetchNodeInfo();
+ }
+ } else {
+ fetchNodeInfo();
+ }
+ }
+
+ // Read the first child node of the current node.
+ AK_FORCE_INLINE void readChildNode() {
+ if (mNodeReader.hasChildren()) {
+ mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mTotalNodeCount = 0;
+ mNodeArrayCount = 0;
+ mPos = mNodeReader.getChildrenPos();
+ // Read children node array.
+ nextNodeArray();
+ if (!isEnd()) {
+ fetchNodeInfo();
+ }
+ } else {
+ mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ // Read the parent node of the current node.
+ AK_FORCE_INLINE void readParentNode() {
+ if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
+ mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mTotalNodeCount = 1;
+ mNodeArrayCount = 1;
+ mNodeCount = 1;
+ mPos = mNodeReader.getParentPos();
+ fetchNodeInfo();
+ } else {
+ mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
+
+ static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+
+ bool mIsError;
+ int mPos;
+ // Node count of a node array.
+ int mNodeCount;
+ int mPrevTotalCodePointCount;
+ int mTotalNodeCount;
+ int mNodeArrayCount;
+ const BufferWithExtendableBuffer *const mBuffer;
+ DynamicPatriciaTrieNodeReader mNodeReader;
+ int mMergedNodeCodePoints[MAX_WORD_LENGTH];
+
+ void nextNodeArray();
+
+ void followForwardLink();
+
+ AK_FORCE_INLINE void fetchNodeInfo() {
+ mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH,
+ mMergedNodeCodePoints);
+ if (mNodeReader.getCodePointCount() <= 0) {
+ // Empty node is not allowed.
+ mIsError = true;
+ mPos = NOT_A_DICT_POS;
+ }
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 0de6341b0..5d979fa51 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -17,7 +17,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -32,7 +32,13 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
if ((flags & MASK_MOVED) == FLAG_IS_NOT_MOVED) {
const int base = *pos;
- return base + ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ if (offset == 0) {
+ // 0 offset means that the node does not have children.
+ return NOT_A_DICT_POS;
+ } else {
+ return base + offset;
+ }
} else {
return NOT_A_DICT_POS;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index 5398d7e37..a6cb46d39 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -20,7 +20,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..128d69d88
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+
+namespace latinime {
+
+bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
+ DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability) {
+ while (!readingHelper->isEnd()) {
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode();
+ continue;
+ }
+ // Check following merged node code points.
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
+ const int nodeCodePointCount = nodeReader->getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount) {
+ // TODO: split current node after j - 1, create child and make this terminal.
+ return false;
+ }
+ if (!readingHelper->isMatchedCodePoint(j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ // TODO: split current node after j - 1 and create two children.
+ return false;
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount()) {
+ if (nodeReader->isTerminal()) {
+ // TODO: Update probability.
+ } else {
+ // TODO: Make it terminal and update probability.
+ }
+ return false;
+ }
+ if (!nodeReader->hasChildren()) {
+ // TODO: Create children node array and add new node as a child.
+ return false;
+ }
+ // Advance to the children nodes.
+ readingHelper->readChildNode();
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ // TODO: add at the last position of the node array.
+ return false;
+}
+
+bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
+ const int probability) {
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoFromBuffer(word0Pos);
+ if (nodeReader.isDeleted()) {
+ return false;
+ }
+ // TODO: Implement.
+ return false;
+}
+
+// Remove a bigram relation from word0Pos to word1Pos.
+bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoFromBuffer(word0Pos);
+ if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
+ return false;
+ }
+ // TODO: Implement.
+ return false;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..f8165fc3d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicBigramListPolicy;
+class DynamicPatriciaTrieReadingHelper;
+class DynamicShortcutListPolicy;
+
+class DynamicPatriciaTrieWritingHelper {
+ public:
+ DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
+ DynamicBigramListPolicy *const bigramPolicy,
+ DynamicShortcutListPolicy *const shortcutPolicy)
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+
+ ~DynamicPatriciaTrieWritingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability);
+
+ // Add a bigram relation from word0Pos to word1Pos.
+ bool addBigramWords(const int word0Pos, const int word1Pos, const int probability);
+
+ // Remove a bigram relation from word0Pos to word1Pos.
+ bool removeBigramWords(const int word0Pos, const int word1Pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
+
+ BufferWithExtendableBuffer *const mBuffer;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ DynamicShortcutListPolicy *const mShortcutPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
new file mode 100644
index 000000000..eb828b58c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+
+namespace latinime {
+
+const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
+ "MULTIPLE_WORDS_DEMOTION_RATE";
+const float HeaderPolicy::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
+const float HeaderPolicy::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
+
+float HeaderPolicy::readMultiWordCostMultiplier() const {
+ const int headerValue = HeaderReadingUtils::readHeaderValueInt(
+ mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
+ if (headerValue == S_INT_MIN) {
+ // not found
+ return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
+ }
+ if (headerValue <= 0) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 240512bce..e3e6fc077 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -14,38 +14,40 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H
-#define LATINIME_BINARY_DICTIONARY_HEADER_H
+#ifndef LATINIME_HEADER_POLICY_H
+#define LATINIME_HEADER_POLICY_H
+
+#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
namespace latinime {
-class BinaryDictionaryInfo;
-
-/**
- * This class abstracts dictionary header structures and provide interface to access dictionary
- * header information.
- */
-class BinaryDictionaryHeader {
+class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
- explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+ explicit HeaderPolicy(const uint8_t *const dictBuf)
+ : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)),
+ mSize(HeaderReadingUtils::getHeaderSize(dictBuf)),
+ mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
+
+ ~HeaderPolicy() {}
AK_FORCE_INLINE int getSize() const {
return mSize;
}
AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return BinaryDictionaryHeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
+ return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return BinaryDictionaryHeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return BinaryDictionaryHeaderReadingUtils::requiresFrenchLigatureProcessing(
+ return HeaderReadingUtils::requiresFrenchLigatureProcessing(
mDictionaryFlags);
}
@@ -60,7 +62,7 @@ class BinaryDictionaryHeader {
outValue[0] = '\0';
return;
}
- if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
+ if (!HeaderReadingUtils::readHeaderValue(mDictBuf,
key, outValue, outValueSize)) {
outValue[0] = '?';
outValue[1] = '\0';
@@ -68,18 +70,19 @@ class BinaryDictionaryHeader {
}
private:
- DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER;
static const float MULTI_WORD_COST_MULTIPLIER_SCALE;
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags;
+ const uint8_t *const mDictBuf;
+ const HeaderReadingUtils::DictionaryFlags mDictionaryFlags;
const int mSize;
const float mMultiWordCostMultiplier;
float readMultiWordCostMultiplier() const;
};
+
} // namespace latinime
-#endif // LATINIME_BINARY_DICTIONARY_HEADER_H
+#endif /* LATINIME_HEADER_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
new file mode 100644
index 000000000..f323876c4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
+
+#include <cctype>
+#include <cstdlib>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const int HeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
+
+const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
+const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2;
+const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4;
+
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) {
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
+ + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
+}
+
+/* static */ HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) {
+ return ByteArrayUtils::readUint16(dictBuf,
+ HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
+}
+
+// Returns if the key is found or not and reads the found value into outValue.
+/* static */ bool HeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf,
+ const char *const key, int *outValue, const int outValueSize) {
+ if (outValueSize <= 0) {
+ return false;
+ }
+ const int headerSize = getHeaderSize(dictBuf);
+ int pos = getHeaderOptionsPosition();
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return false;
+ }
+ while (pos < headerSize) {
+ if(ByteArrayUtils::compareStringInBufferWithCharArray(
+ dictBuf, key, headerSize - pos, &pos) == 0) {
+ // The key was found.
+ const int length = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, outValueSize,
+ outValue, &pos);
+ // Add a 0 terminator to the string.
+ outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
+ return true;
+ }
+ ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos);
+ }
+ // The key was not found.
+ return false;
+}
+
+/* static */ int HeaderReadingUtils::readHeaderValueInt(
+ const uint8_t *const dictBuf, const char *const key) {
+ const int bufferSize = LARGEST_INT_DIGIT_COUNT;
+ int intBuffer[bufferSize];
+ char charBuffer[bufferSize];
+ if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) {
+ return S_INT_MIN;
+ }
+ for (int i = 0; i < bufferSize; ++i) {
+ charBuffer[i] = intBuffer[i];
+ if (charBuffer[i] == '0') {
+ break;
+ }
+ if (!isdigit(charBuffer[i])) {
+ // If not a number, return S_INT_MIN
+ return S_INT_MIN;
+ }
+ }
+ return atoi(charBuffer);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h
new file mode 100644
index 000000000..c94919640
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HEADER_READING_UTILS_H
+#define LATINIME_HEADER_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class HeaderReadingUtils {
+ public:
+ typedef uint16_t DictionaryFlags;
+
+ static const int MAX_OPTION_KEY_LENGTH;
+
+ static int getHeaderSize(const uint8_t *const dictBuf);
+
+ static DictionaryFlags getFlags(const uint8_t *const dictBuf);
+
+ static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
+ return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
+ return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
+ return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE int getHeaderOptionsPosition() {
+ return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ + HEADER_SIZE_FIELD_SIZE;
+ }
+
+ static bool readHeaderValue(const uint8_t *const dictBuf,
+ const char *const key, int *outValue, const int outValueSize);
+
+ static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils);
+
+ static const int HEADER_MAGIC_NUMBER_SIZE;
+ static const int HEADER_DICTIONARY_VERSION_SIZE;
+ static const int HEADER_FLAG_SIZE;
+ static const int HEADER_SIZE_FIELD_SIZE;
+
+ static const DictionaryFlags NO_FLAGS;
+ // Flags for special processing
+ // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
+ // something very bad (like, the apocalypse) will happen. Please update both at the same time.
+ static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
+ static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
+ static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
+ static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+};
+}
+#endif /* LATINIME_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index 097f7c86a..adcf2dbdf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -20,55 +20,299 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-#include "suggest/policyimpl/dictionary/binary_format.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime {
-const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
-
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
+ DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
int nextPos = dicNode->getChildrenPos();
- const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), &nextPos);
+ const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &nextPos);
for (int i = 0; i < childCount; i++) {
- nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
- nodeFilter, childDicNodes);
+ nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
}
}
+// This retrieves code points and the probability of the word by its terminal position.
+// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
+// it is possible to check for this with advantageous complexity. For each node, we search
+// for PtNodes with children and compare the children position with the position we look for.
+// When we shoot the position we look for, it means the word we look for is in the children
+// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
+// PtNode array with the last PtNode's children position still less than what we are searching for,
+// we must descend the last PtNode's children (for example, if the word we are searching for starts
+// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
+// than the position we look for, and we have to descend the z node).
+/* Parameters :
+ * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
+ * what is stored as the "bigram position" in each bigram)
+ * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
+ * outUnigramProbability: a pointer to an int to write the probability into.
+ * Return value : the code point count, of 0 if the word was not found.
+ */
+// TODO: Split this function to be more readable
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(
- binaryDictionaryInfo->getDictRoot(), nodePos,
- maxCodePointCount, outCodePoints, outUnigramProbability);
+ int pos = getRootPosition();
+ int wordPos = 0;
+ // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
+ // only traverse nodes that are actually a part of the terminal we are searching, so each time
+ // we enter this loop we are one depth level further than last time.
+ // The only reason we count nodes is because we want to reduce the probability of infinite
+ // looping in case there is a bug. Since we know there is an upper bound to the depth we are
+ // supposed to traverse, it does not hurt to count iterations.
+ for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
+ int lastCandidatePtNodePos = 0;
+ // Let's loop through PtNodes in this PtNode array searching for either the terminal
+ // or one of its ascendants.
+ for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
+ const int startPos = pos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ if (nodePos == startPos) {
+ // We found the position. Copy the rest of the code points in the buffer and return
+ // the length.
+ outCodePoints[wordPos] = character;
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ // We count code points in order to avoid infinite loops if the file is broken
+ // or if there is some other bug
+ int charCount = maxCodePointCount;
+ while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
+ outCodePoints[++wordPos] = nextChar;
+ nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ }
+ }
+ *outUnigramProbability =
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
+ &pos);
+ return ++wordPos;
+ }
+ // We need to skip past this PtNode, so skip any remaining code points after the
+ // first and possibly the probability.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ }
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ // The fact that this PtNode has children is very important. Since we already know
+ // that this PtNode does not match, if it has no children we know it is irrelevant
+ // to what we are searching for.
+ const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
+ // We will write in `found' whether we have passed the children position we are
+ // searching for. For example if we search for "beer", the children of b are less
+ // than the address we are searching for and the children of c are greater. When we
+ // come here for c, we realize this is too big, and that we should descend b.
+ bool found;
+ if (hasChildren) {
+ int currentPos = pos;
+ // Here comes the tricky part. First, read the children position.
+ const int childrenPos = PatriciaTrieReadingUtils
+ ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
+ if (childrenPos > nodePos) {
+ // If the children pos is greater than the position, it means the previous
+ // PtNode, which position is stored in lastCandidatePtNodePos, was the right
+ // one.
+ found = true;
+ } else if (1 >= ptNodeCount) {
+ // However if we are on the LAST PtNode of this array, and we have NOT shot the
+ // position we should descend THIS node. So we trick the lastCandidatePtNodePos
+ // so that we will descend this PtNode, not the previous one.
+ lastCandidatePtNodePos = startPos;
+ found = true;
+ } else {
+ // Else, we should continue looking.
+ found = false;
+ }
+ } else {
+ // Even if we don't have children here, we could still be on the last PtNode of /
+ // this array. If this is the case, we should descend the last PtNode that had
+ // children, and their position is already in lastCandidatePtNodePos.
+ found = (1 >= ptNodeCount);
+ }
+
+ if (found) {
+ // Okay, we found the PtNode we should descend. Its position is in
+ // the lastCandidatePtNodePos variable, so we just re-read it.
+ if (0 != lastCandidatePtNodePos) {
+ const PatriciaTrieReadingUtils::NodeFlags lastFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ // We copy all the characters in this PtNode to the buffer
+ outCodePoints[wordPos] = lastChar;
+ if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
+ int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ int charCount = maxCodePointCount;
+ while (-1 != nextChar && --charCount > 0) {
+ outCodePoints[++wordPos] = nextChar;
+ nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ }
+ }
+ ++wordPos;
+ // Now we only need to branch to the children address. Skip the probability if
+ // it's there, read pos, and break to resume the search at pos.
+ if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
+ &lastCandidatePtNodePos);
+ }
+ pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, lastFlags, &lastCandidatePtNodePos);
+ break;
+ } else {
+ // Here is a little tricky part: we come here if we found out that all children
+ // addresses in this PtNode are bigger than the address we are searching for.
+ // Should we conclude the word is not in the dictionary? No! It could still be
+ // one of the remaining PtNodes in this array, so we have to keep looking in
+ // this array until we find it (or we realize it's not there either, in which
+ // case it's actually not in the dictionary). Pass the end of this PtNode,
+ // ready to start the next one.
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ mBigramListPolicy.skipAllBigrams(&pos);
+ }
+ }
+ } else {
+ // If we did not find it, we should record the last children address for the next
+ // iteration.
+ if (hasChildren) lastCandidatePtNodePos = startPos;
+ // Now skip the end of this PtNode (children pos and the attributes if any) so that
+ // our pos is after the end of this PtNode, at the start of the next one.
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ mBigramListPolicy.skipAllBigrams(&pos);
+ }
+ }
+
+ }
+ }
+ // If we have looked through all the PtNodes and found no match, the nodePos is
+ // not the position of a terminal in this dictionary.
+ return 0;
}
-int PatriciaTriePolicy::getTerminalNodePositionOfWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+// This function gets the position of the terminal node of the exact matching word in the
+// dictionary. If no match is found, it returns NOT_A_VALID_WORD_POS.
+int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord,
- length, forceLowerCaseSearch);
+ int pos = getRootPosition();
+ int wordPos = 0;
+
+ while (true) {
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ if (wordPos >= length) return NOT_A_VALID_WORD_POS;
+ int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
+ &pos);
+ const int wChar = forceLowerCaseSearch
+ ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
+ while (true) {
+ // If there are no more PtNodes in this array, it means we could not
+ // find a matching character for this depth, therefore there is no match.
+ if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS;
+ const int ptNodePos = pos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
+ &pos);
+ if (character == wChar) {
+ // This is the correct PtNode. Only one PtNode may start with the same char within
+ // a PtNode array, so either we found our match in this array, or there is
+ // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the
+ // characters in this PtNode indeed does match.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
+ &pos);
+ while (NOT_A_CODE_POINT != character) {
+ ++wordPos;
+ // If we shoot the length of the word we search for, or if we find a single
+ // character that does not match, as explained above, it means the word is
+ // not in the dictionary (by virtue of this PtNode being the only one to
+ // match the word on the first character, but not matching the whole word).
+ if (wordPos >= length) return NOT_A_VALID_WORD_POS;
+ if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS;
+ character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ }
+ }
+ // If we come here we know that so far, we do match. Either we are on a terminal
+ // and we match the length, in which case we found it, or we traverse children.
+ // If we don't match the length AND don't have children, then a word in the
+ // dictionary fully matches a prefix of the searched word but not the full word.
+ ++wordPos;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ if (wordPos == length) {
+ return ptNodePos;
+ }
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ return NOT_A_VALID_WORD_POS;
+ }
+ // We have children and we are still shorter than the word we are searching for, so
+ // we need to traverse children. Put the pointer on the children position, and
+ // break
+ pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
+ flags, &pos);
+ break;
+ } else {
+ // This PtNode does not match, so skip the remaining part and go to the next.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
+ &pos);
+ }
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
+ flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ mBigramListPolicy.skipAllBigrams(&pos);
+ }
+ }
+ --ptNodeCount;
+ }
+ }
}
-int PatriciaTriePolicy::getUnigramProbability(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
+int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
- const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
return NOT_A_PROBABILITY;
}
@@ -79,90 +323,79 @@ int PatriciaTriePolicy::getUnigramProbability(
// for shortcuts).
return NOT_A_PROBABILITY;
}
- PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
- return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
-int PatriciaTriePolicy::getShortcutPositionOfNode(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const {
+int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
return NOT_A_DICT_POS;
}
- PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
}
return pos;
}
-int PatriciaTriePolicy::getBigramsPositionOfNode(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const {
+int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
return NOT_A_DICT_POS;
}
- PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
+ mShortcutListPolicy.skipAllShortcuts(&pos);;
}
return pos;
}
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
- const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
- const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
+ const int nodePos, DicNodeVector *childDicNodes) const {
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH];
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
+ mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos)
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
: NOT_A_PROBABILITY;
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictRoot, flags, &pos) : NOT_A_DICT_POS;
+ mDictRoot, flags, &pos) : NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
+ getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
}
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
- binaryDictionaryInfo, &pos);
- }
- if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
- childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
- PatriciaTrieReadingUtils::isTerminal(flags),
- PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
- PatriciaTrieReadingUtils::isBlacklisted(flags) ||
- PatriciaTrieReadingUtils::isNotAWord(flags),
- mergedNodeCodePointCount, mergedNodeCodePoints);
+ getBigramsStructurePolicy()->skipAllBigrams(&pos);
}
+ childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
+ PatriciaTrieReadingUtils::isTerminal(flags),
+ PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags) ||
+ PatriciaTrieReadingUtils::isNotAWord(flags),
+ mergedNodeCodePointCount, mergedNodeCodePoints);
return pos;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 71f256eee..d0567fd85 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -17,15 +17,29 @@
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
#define LATINIME_PATRICIA_TRIE_POLICY_H
+#include <stdint.h>
+
#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
-class PatriciaTriePolicy : public DictionaryStructurePolicy {
+class DicNode;
+class DicNodeVector;
+
+class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() {
- return &sInstance;
+ PatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
+
+ ~PatriciaTriePolicy() {
+ delete mBuffer;
}
AK_FORCE_INLINE int getRootPosition() const {
@@ -33,37 +47,64 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
}
void createAndGetAllChildNodes(const DicNode *const dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
+ DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
- int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const;
+ int getUnigramProbability(const int nodePos) const;
+
+ int getShortcutPositionOfNode(const int nodePos) const;
- int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const;
+ int getBigramsPositionOfNode(const int nodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramListPolicy;
+ }
- int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int nodePos) const;
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
private:
- DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
- static const PatriciaTriePolicy sInstance;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- PatriciaTriePolicy() {}
- ~PatriciaTriePolicy() {}
+ const MmappedBuffer *const mBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ const uint8_t *const mDictRoot;
+ const BigramListPolicy mBigramListPolicy;
+ const ShortcutListPolicy mShortcutListPolicy;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
+ DicNodeVector *const childDicNodes) const;
};
} // namespace latinime
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
index 89e981df8..576a158bc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -17,21 +17,21 @@
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
typedef PatriciaTrieReadingUtils PtReadingUtils;
-const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0;
// Flag for single/multiple char group
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
-// Flag for terminal groups
+// Flag for terminal PtNodes
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
// Flag for shortcut targets presence
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
@@ -46,14 +46,14 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
const int base = *pos;
int offset = 0;
- switch (MASK_GROUP_ADDRESS_TYPE & flags) {
- case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+ switch (MASK_CHILDREN_POSITION_TYPE & flags) {
+ case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE:
offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
break;
- case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+ case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES:
offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
break;
- case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+ case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES:
offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
break;
default:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
index 002c3f19b..f76c38751 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -20,7 +20,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -28,7 +28,7 @@ class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
- static AK_FORCE_INLINE int getGroupCountAndAdvancePosition(
+ static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
if (firstByte < 0x80) {
@@ -116,17 +116,17 @@ class PatriciaTrieReadingUtils {
}
static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
- return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags);
+ return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
- static const NodeFlags MASK_GROUP_ADDRESS_TYPE;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
+ static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
static const NodeFlags FLAG_IS_TERMINAL;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
new file mode 100644
index 000000000..5e9c52950
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+/*
+ * This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
+ */
+class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit DynamicShortcutListPolicy(BufferWithExtendableBuffer *const buffer)
+ : mBuffer(buffer) {}
+
+ ~DynamicShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ buffer, maxCodePointCount, outCodePoint, pos);
+ }
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(buffer, pos);
+ *pos += shortcutListSize;
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ // Copy shortcuts from the shortcut list that starts at fromPos to toPos and advance these
+ // positions after the shortcut lists.
+ void copyAllShortcuts(int *const fromPos, int *const toPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *fromPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(buffer, fromPos);
+ // Copy shortcut list size.
+ mBuffer->writeUintAndAdvancePosition(
+ shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
+ ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos);
+ for (int i = 0; i < shortcutListSize; ++i) {
+ const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(buffer, fromPos);
+ mBuffer->writeUintAndAdvancePosition(data, 1 /* size */, toPos);
+ }
+ if (usesAdditionalBuffer) {
+ *fromPos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
+
+ BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
new file mode 100644
index 000000000..d73f73953
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_POLICY_H
+#define LATINIME_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+
+namespace latinime {
+
+class ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit ShortcutListPolicy(const uint8_t *const shortcutBuf)
+ : mShortcutsBuf(shortcutBuf) {}
+
+ ~ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int listPos = pos;
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mShortcutsBuf, &listPos);
+ return listPos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mShortcutsBuf, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ mShortcutsBuf, maxCodePointCount, outCodePoint, pos);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos);
+ *pos += shortcutListSize;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListPolicy);
+
+ const uint8_t *const mShortcutsBuf;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
new file mode 100644
index 000000000..e70bb5071
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+
+namespace latinime {
+
+// Flag for presence of more attributes
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
+// The numeric value of the shortcut probability that means 'whitelist'.
+const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
new file mode 100644
index 000000000..5f4f240f5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H
+#define LATINIME_SHORTCUT_LIST_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+class ShortcutListReadingUtils {
+ public:
+ typedef uint8_t ShortcutFlags;
+
+ static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
+ }
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const ShortcutFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // This method returns the size of the shortcut list region excluding the shortcut list size
+ // field at the beginning.
+ static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
+ return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos)
+ - SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
+ static AK_FORCE_INLINE int getShortcutListSizeFieldSize() {
+ return SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
+ static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) {
+ const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos);
+ *pos += shortcutListSize;
+ }
+
+ static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
+ return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE int readShortcutTarget(
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord,
+ int *const pos) {
+ return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils);
+
+ static const ShortcutFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const ShortcutFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ static const int WHITELIST_SHORTCUT_PROBABILITY;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_READING_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
new file mode 100644
index 000000000..8582c4b81
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
+const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
new file mode 100644
index 000000000..ec871ec85
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
+#define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
+
+#include <cstddef>
+#include <stdint.h>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+// This is used as a buffer that can be extended for updatable dictionaries.
+// To optimize performance, raw pointer is directly used for reading buffer. The position has to be
+// adjusted to access additional buffer. On the other hand, this class does not provide writable
+// raw pointer but provides several methods that handle boundary checking for writing data.
+class BufferWithExtendableBuffer {
+ public:
+ BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
+ : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
+ mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
+
+ AK_FORCE_INLINE int getTailPosition() const {
+ return mOriginalBufferSize + mUsedAdditionalBufferSize;
+ }
+
+ /**
+ * For reading.
+ */
+ AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
+ return position >= mOriginalBufferSize;
+ }
+
+ // CAVEAT!: Be careful about array out of bound access with buffers
+ AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
+ if (usesAdditionalBuffer) {
+ return &mAdditionalBuffer[0];
+ } else {
+ return mOriginalBuffer;
+ }
+ }
+
+ AK_FORCE_INLINE int getOriginalBufferSize() const {
+ return mOriginalBufferSize;
+ }
+
+ /**
+ * For writing.
+ *
+ * Writing is allowed for original buffer, already written region of additional buffer and the
+ * tail of additional buffer.
+ */
+ AK_FORCE_INLINE bool writeUintAndAdvancePosition(const uint32_t data, const int size,
+ int *const pos) {
+ if (!(size >= 1 && size <= 4)) {
+ AKLOGI("writeUintAndAdvancePosition() is called with invalid size: %d", size);
+ ASSERT(false);
+ return false;
+ }
+ if (!checkAndPrepareWriting(*pos, size)) {
+ return false;
+ }
+ const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
+ uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ if (usesAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
+
+ static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
+ static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
+ static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+
+ uint8_t *const mOriginalBuffer;
+ const int mOriginalBufferSize;
+ std::vector<uint8_t> mAdditionalBuffer;
+ int mUsedAdditionalBufferSize;
+
+ // Return if the buffer is successfully extended or not.
+ AK_FORCE_INLINE bool extendBuffer() {
+ if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
+ > MAX_ADDITIONAL_BUFFER_SIZE) {
+ return false;
+ }
+ mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
+ return true;
+ }
+
+ // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
+ // the additional buffer, try extending the buffer.
+ AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size) {
+ if (isInAdditionalBuffer(pos)) {
+ if (pos == mUsedAdditionalBufferSize) {
+ // Append data to the tail.
+ if (pos + size > static_cast<int>(mAdditionalBuffer.size())) {
+ // Need to extend buffer.
+ if (!extendBuffer()) {
+ return false;
+ }
+ }
+ mUsedAdditionalBufferSize += size;
+ } else if (pos + size >= mUsedAdditionalBufferSize) {
+ // The access will beyond the tail of used region.
+ return false;
+ }
+ } else {
+ if (pos < 0 || mOriginalBufferSize < pos + size) {
+ // Invalid position or violate the boundary.
+ return false;
+ }
+ }
+ return true;
+ }
+};
+}
+#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
index 68b1d5d15..a84cfb9d5 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 75ccfc766..1d14929c7 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -29,7 +29,34 @@ namespace latinime {
class ByteArrayUtils {
public:
/**
- * Integer
+ * Integer writing
+ *
+ * Each method write a corresponding size integer in a big endian manner.
+ */
+ static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, const int size, int *const pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
+ return;
+ case 2:
+ ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);
+ return;
+ case 3:
+ ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);
+ return;
+ case 4:
+ ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);
+ return;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * Integer reading
*
* Each method read a corresponding size integer in a big endian manner.
*/
@@ -187,6 +214,32 @@ class ByteArrayUtils {
static const uint8_t MINIMAL_ONE_BYTE_CHARACTER_VALUE;
static const uint8_t CHARACTER_ARRAY_TERMINATOR;
+
+ static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 24) & 0xFF;
+ buffer[(*pos)++] = (data >> 16) & 0xFF;
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 16) & 0xFF;
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer,
+ const uint16_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer,
+ const uint8_t data, int *const pos) {
+ buffer[(*pos)++] = data & 0xFF;
+ }
};
} // namespace latinime
#endif /* LATINIME_BYTE_ARRAY_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 0e8d72f2e..3796c7b7b 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -14,7 +14,9 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -22,22 +24,19 @@ namespace latinime {
* Dictionary size
*/
// Any file smaller than this is not a dictionary.
-const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
+const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
/**
* Format versions
*/
-
-// The versions of Latin IME that only handle format version 1 only test for the magic
-// number, so we had to change it so that version 2 files would be rejected by older
-// implementations. On this occasion, we made the magic number 32 bits long.
-const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported
+// or obsolete dictionary formats.
+const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
+const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
-/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
- BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
- const int dictSize) {
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
+ const uint8_t *const dict, const int dictSize) {
// The magic number is stored big-endian.
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
// understand this format.
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 830684c70..f84321577 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -14,24 +14,19 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
+#ifndef LATINIME_FORMAT_UTILS_H
+#define LATINIME_FORMAT_UTILS_H
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
namespace latinime {
/**
* Methods to handle binary dictionary format version.
- *
- * Currently, we have a file with a similar name, binary_format.h. binary_format.h contains binary
- * reading methods and utility methods for various purposes.
- * On the other hand, this file deals with only about dictionary format version.
*/
-class BinaryDictionaryFormatUtils {
+class FormatUtils {
public:
enum FORMAT_VERSION {
VERSION_2,
@@ -42,11 +37,11 @@ class BinaryDictionaryFormatUtils {
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
-#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
+#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
new file mode 100644
index 000000000..6febd7832
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_MMAPPED_BUFFER_H
+#define LATINIME_MMAPPED_BUFFER_H
+
+#include <cerrno>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class MmappedBuffer {
+ public:
+ static MmappedBuffer* openBuffer(const char *const path, const int pathLength,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable) {
+ const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
+ const int mmapFd = open(path, openMode);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return 0;
+ }
+ const int pagesize = getpagesize();
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return 0;
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return 0;
+ }
+ return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
+ isUpdatable);
+ }
+
+ ~MmappedBuffer() {
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+ }
+
+ AK_FORCE_INLINE uint8_t *getBuffer() const {
+ return mBuffer;
+ }
+
+ AK_FORCE_INLINE int getBufferSize() const {
+ return mBufferSize;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ private:
+ AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize,
+ void *const mmappedBuffer, const int alignedSize, const int mmapFd,
+ const bool isUpdatable)
+ : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
+ mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
+
+ uint8_t *const mBuffer;
+ const int mBufferSize;
+ void *const mMmappedBuffer;
+ const int mAlignedSize;
+ const int mMmapFd;
+ const bool mIsUpdatable;
+};
+}
+#endif /* LATINIME_MMAPPED_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 7cddb0882..b6aa85896 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -155,7 +155,8 @@ class TypingWeighting : public Weighting {
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode,
MultiBigramMap *const multiBigramMap) const {
- return DicNodeUtils::getBigramNodeImprobability(traverseSession->getBinaryDictionaryInfo(),
+ return DicNodeUtils::getBigramNodeImprobability(
+ traverseSession->getDictionaryStructurePolicy(),
dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}
diff --git a/native/jni/src/utils/autocorrection_threshold_utils.cpp b/native/jni/src/utils/autocorrection_threshold_utils.cpp
index 3406e0f8e..1f8ee0814 100644
--- a/native/jni/src/utils/autocorrection_threshold_utils.cpp
+++ b/native/jni/src/utils/autocorrection_threshold_utils.cpp
@@ -83,9 +83,12 @@ const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2;
return 0.0f;
}
+ if (score <= 0 || distance >= afterLength) {
+ // normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0,
+ // or if the edit distance is larger than or equal to afterLength.
+ return 0.0f;
+ }
// add a weight based on edit distance.
- // distance <= max(afterLength, beforeLength) == afterLength,
- // so, 0 <= distance / afterLength <= 1
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
// TODO: Revise the following logic thoroughly by referring to...