aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroynagi <ksk@google.com>2013-06-28 19:47:00 +0900
committerKeisuke Kuroynagi <ksk@google.com>2013-06-28 19:47:00 +0900
commitc5e6efafff56c57c5527fe64dddb851df0719634 (patch)
tree95a936112d1b4e2f3d55260f78c31ec9cc78a6ce
parente04794cbbeafec7463a91554d7c069deacddc744 (diff)
downloadlatinime-c5e6efafff56c57c5527fe64dddb851df0719634.tar.gz
latinime-c5e6efafff56c57c5527fe64dddb851df0719634.tar.xz
latinime-c5e6efafff56c57c5527fe64dddb851df0719634.zip
Introduce patriciaTrie to abstract traversing version 2 dictionary.
Bug: 6669677 Change-Id: Ifef72f3d7a7ba67c5232b98c7835485d72d7322d
-rw-r--r--native/jni/Android.mk1
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp11
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h16
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp70
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h58
8 files changed, 198 insertions, 32 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index d5df6b62e..f89eea735 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -70,6 +70,7 @@ LATIN_IME_CORE_SRC_FILES := \
proximity_info_state_utils.cpp) \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
+ suggest/policyimpl/dictionary/patricia_trie_policy.cpp \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 2063c39ee..9bf7eceb5 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -26,6 +26,7 @@
#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/policy/dictionary_structure_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -36,14 +37,15 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(binaryDictionaryInfo->getRootPosition(), prevWordNodePos);
+ newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(),
+ prevWordNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, binaryDictionaryInfo->getRootPosition());
+ prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition());
}
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 242a9bdd6..ff304d2b2 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -150,11 +150,10 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return 0;
- const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
- int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
- forceLowerCaseSearch);
-
+ int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
+ mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
if (NOT_VALID_WORD == pos) return 0;
+ const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
@@ -189,8 +188,8 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;
- int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(),
- word1, length1, false /* forceLowerCaseSearch */);
+ int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
+ mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == nextWordPos) return false;
for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index c92123679..7cb31440a 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -22,11 +22,10 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
+#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
namespace latinime {
-class BinaryDictionaryHeader;
-
class BinaryDictionaryInfo {
public:
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
@@ -35,7 +34,9 @@ class BinaryDictionaryInfo {
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)),
- mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {}
+ mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
+ mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
+ mDictionaryFormat)) {}
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -61,10 +62,6 @@ class BinaryDictionaryInfo {
return mDictionaryFormat;
}
- AK_FORCE_INLINE int getRootPosition() const {
- return 0;
- }
-
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
return &mDictionaryHeader;
}
@@ -75,6 +72,10 @@ class BinaryDictionaryInfo {
return mIsUpdatable && isUpdatableDictionaryFormat;
}
+ AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
+ return mStructurePolicy;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
@@ -86,6 +87,7 @@ class BinaryDictionaryInfo {
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
+ const DictionaryStructurePolicy *const mStructurePolicy;
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 51f23dc55..675b54972 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -83,27 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
}
int Dictionary::getProbability(const int *word, int length) const {
- const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot();
- int pos = BinaryFormat::getTerminalPosition(root, word, length,
+ const DictionaryStructurePolicy *const structurePolicy =
+ mBinaryDictionaryInfo.getStructurePolicy();
+ int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == pos) {
return NOT_A_PROBABILITY;
}
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
- // If this is not a word, or if it's a blacklisted entry, it should behave as
- // having no probability outside of the suggestion process (where it should be used
- // for shortcuts).
- return NOT_A_PROBABILITY;
- }
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- if (hasMultipleChars) {
- pos = BinaryFormat::skipOtherCharacters(root, pos);
- } else {
- BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- }
- const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
- return unigramProbability;
+ return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
}
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
new file mode 100644
index 000000000..5070651cb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
+#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+
+namespace latinime {
+
+class DictionaryStructurePolicy;
+
+class DictionaryStructurePolicyFactory {
+ public:
+ static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
+ switch (dictionaryFormat) {
+ case BinaryDictionaryFormatUtils::VERSION_1:
+ // Fall through
+ case BinaryDictionaryFormatUtils::VERSION_2:
+ return PatriciaTriePolicy::getInstance();
+ default:
+ ASSERT(false);
+ return 0;
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
new file mode 100644
index 000000000..c995af98a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+
+#include "defines.h"
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_format.h"
+
+namespace latinime {
+
+const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
+
+void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
+ // TODO: Move children creating methods form DicNodeUtils.
+}
+
+void PatriciaTriePolicy::getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int terminalNodePos, const int maxDepth, int *const outWord,
+ int *const outUnigramProbability) const {
+ BinaryFormat::getWordAtAddress(binaryDictionaryInfo->getDictRoot(), terminalNodePos,
+ maxDepth, outWord, outUnigramProbability);
+}
+
+int PatriciaTriePolicy::getTerminalNodePositionOfWord(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord,
+ length, forceLowerCaseSearch);
+}
+
+int PatriciaTriePolicy::getUnigramProbability(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
+ const uint8_t *const root = binaryDictionaryInfo->getDictRoot();
+ int pos = nodePos;
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no probability outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
+ const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
+ if (hasMultipleChars) {
+ pos = BinaryFormat::skipOtherCharacters(root, pos);
+ } else {
+ BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ }
+ return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
new file mode 100644
index 000000000..9b9338145
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
+#define LATINIME_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_policy.h"
+
+namespace latinime {
+
+class PatriciaTriePolicy : public DictionaryStructurePolicy {
+ public:
+ static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() {
+ return &sInstance;
+ }
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildNodes(const DicNode *const dicNode,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
+
+ void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int terminalNodePos, const int maxDepth, int *const outWord,
+ int *const outUnigramProbability) const;
+
+ int getTerminalNodePositionOfWord(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int nodePos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
+ static const PatriciaTriePolicy sInstance;
+
+ PatriciaTriePolicy() {}
+ ~PatriciaTriePolicy() {}
+};
+} // namespace latinime
+#endif // LATINIME_PATRICIA_TRIE_POLICY_H