aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp31
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h19
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp27
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp49
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp123
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h105
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h73
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h55
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp39
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h106
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp49
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h9
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.cpp13
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.h9
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h9
-rw-r--r--native/jni/src/suggest/core/dictionary/terminal_attributes.h93
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h50
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h46
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h6
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp2
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp19
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h5
-rw-r--r--native/jni/src/suggest/core/suggest.cpp34
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_header.h)41
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp108
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h76
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h77
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/byte_array_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h (renamed from native/jni/src/suggest/core/dictionary/byte_array_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp)21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h)15
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
50 files changed, 828 insertions, 785 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 4ff4bc2e4..150eb6762 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -21,7 +21,6 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_proximity_filter.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@@ -33,17 +32,17 @@ namespace latinime {
// Node initialization utils //
///////////////////////////////
-/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+/* static */ void DicNodeUtils::initAsRoot(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(),
- prevWordNodePos);
+ newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition());
+ prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
}
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
@@ -67,12 +66,13 @@ namespace latinime {
}
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) {
- getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes);
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ DicNodeVector *childDicNodes) {
+ getProximityChildDicNodes(dicNode, dictionaryStructurePolicy, 0, 0, false, childDicNodes);
}
/* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
DicNodeVector *childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
@@ -82,7 +82,7 @@ namespace latinime {
if (!dicNode->isLeavingNode()) {
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
} else {
- binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
+ dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode,
&childrenFilter, childDicNodes);
}
}
@@ -94,12 +94,13 @@ namespace latinime {
* Computes the combined bigram / unigram cost for the given dicNode.
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) {
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap);
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
+ multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
/ static_cast<float>(MAX_PROBABILITY);
@@ -107,7 +108,7 @@ namespace latinime {
}
/* static */ int DicNodeUtils::getBigramNodeProbability(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) {
const int unigramProbability = node->getProbability();
const int wordPos = node->getPos();
@@ -118,8 +119,8 @@ namespace latinime {
return ProbabilityUtils::backoff(unigramProbability);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(),
- prevWordPos, wordPos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
+ wordPos, unigramProbability);
}
return ProbabilityUtils::backoff(unigramProbability);
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 4f12b29f4..8dc984fe1 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -23,10 +23,10 @@
namespace latinime {
-class BinaryDictionaryInfo;
class DicNode;
class DicNodeProximityFilter;
class DicNodeVector;
+class DictionaryStructureWithBufferPolicy;
class ProximityInfoState;
class MultiBigramMap;
@@ -34,18 +34,22 @@ class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
const int16_t length1, int *dest);
- static void initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ static void initAsRoot(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *newRootNode);
- static void initAsRootWithPreviousWord(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ static void initAsRootWithPreviousWord(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *prevWordLastNode, DicNode *newRootNode);
static void initByCopy(DicNode *srcNode, DicNode *destNode);
static void getAllChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes);
- static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ DicNodeVector *childDicNodes);
+ static float getBigramNodeImprobability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *const multiBigramMap);
// TODO: Move to private
static void getProximityChildDicNodes(DicNode *dicNode,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
DicNodeVector *childDicNodes);
@@ -54,7 +58,8 @@ class DicNodeUtils {
// Max number of bigrams to look up
static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
- static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ static int getBigramNodeProbability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap);
static void createAndGetPassingChildNode(DicNode *dicNode,
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index fdaa562e5..ebe76467a 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -22,15 +22,16 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/probability_utils.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime {
-BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mBinaryDictionaryInfo(binaryDictionaryInfo) {
+BigramDictionary::BigramDictionary(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
+ : mDictionaryStructurePolicy(dictionaryStructurePolicy) {
if (DEBUG_DICT) {
AKLOGI("BigramDictionary - constructor");
}
@@ -112,12 +113,11 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int bigramCount = 0;
int unigramProbability = 0;
int bigramBuffer[MAX_WORD_LENGTH];
- const DictionaryStructureWithBufferPolicy *const structurePolicy =
- mBinaryDictionaryInfo->getStructurePolicy();
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
+ BinaryDictionaryBigramsIterator bigramsIt(
+ mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- const int length = structurePolicy->
+ const int length = mDictionaryStructurePolicy->
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
// Due to space constraints, the probability for bigrams is approximate - the lower the
@@ -139,12 +139,10 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
- const DictionaryStructureWithBufferPolicy *const structurePolicy =
- mBinaryDictionaryInfo->getStructurePolicy();
- int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
+ int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
- return structurePolicy->getBigramsPositionOfNode(pos);
+ return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
}
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
@@ -152,13 +150,12 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return false;
- const DictionaryStructureWithBufferPolicy *const structurePolicy =
- mBinaryDictionaryInfo->getStructurePolicy();
- int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1,
+ int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
+ BinaryDictionaryBigramsIterator bigramsIt(
+ mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) {
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 438c34cac..99b964c49 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -21,11 +21,11 @@
namespace latinime {
-class BinaryDictionaryInfo;
+class DictionaryStructureWithBufferPolicy;
class BigramDictionary {
public:
- BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+ BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
int getPredictions(const int *word, int length, int *outBigramCodePoints,
int *outBigramProbability, int *outputTypes) const;
@@ -40,7 +40,7 @@ class BigramDictionary {
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
deleted file mode 100644
index 91c643a5f..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_header.h"
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-
-namespace latinime {
-
-const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
- "MULTIPLE_WORDS_DEMOTION_RATE";
-const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
-const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
-
-BinaryDictionaryHeader::BinaryDictionaryHeader(
- const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mBinaryDictionaryInfo(binaryDictionaryInfo),
- mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(binaryDictionaryInfo)),
- mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(binaryDictionaryInfo)),
- mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
-
-float BinaryDictionaryHeader::readMultiWordCostMultiplier() const {
- const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
- mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
- if (headerValue == S_INT_MIN) {
- // not found
- return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
- }
- if (headerValue <= 0) {
- return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
- }
- return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
deleted file mode 100644
index a57b0f859..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
-
-#include <cctype>
-#include <cstdlib>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-
-namespace latinime {
-
-const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
-
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
-
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
-// Flags for special processing
-// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
-// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-
-/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
- const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
- case HEADER_VERSION_2:
- // See the format of the header in the comment in
- // BinaryDictionaryFormatUtils::detectFormatVersion()
- return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
- + VERSION_2_HEADER_FLAG_SIZE);
- default:
- return S_INT_MAX;
- }
-}
-
-/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
- BinaryDictionaryHeaderReadingUtils::getFlags(
- const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
- case HEADER_VERSION_2:
- return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
- default:
- return NO_FLAGS;
- }
-}
-
-// Returns if the key is found or not and reads the found value into outValue.
-/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const char *const key, int *outValue, const int outValueSize) {
- if (outValueSize <= 0) {
- return false;
- }
- const int headerSize = getHeaderSize(binaryDictionaryInfo);
- int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
- if (pos == NOT_A_DICT_POS) {
- // The header doesn't have header options.
- return false;
- }
- while (pos < headerSize) {
- if(ByteArrayUtils::compareStringInBufferWithCharArray(
- binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
- // The key was found.
- const int length = ByteArrayUtils::readStringAndAdvancePosition(
- binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
- // Add a 0 terminator to the string.
- outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
- return true;
- }
- ByteArrayUtils::advancePositionToBehindString(
- binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos);
- }
- // The key was not found.
- return false;
-}
-
-/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) {
- const int bufferSize = LARGEST_INT_DIGIT_COUNT;
- int intBuffer[bufferSize];
- char charBuffer[bufferSize];
- if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) {
- return S_INT_MIN;
- }
- for (int i = 0; i < bufferSize; ++i) {
- charBuffer[i] = intBuffer[i];
- if (charBuffer[i] == '0') {
- break;
- }
- if (!isdigit(charBuffer[i])) {
- // If not a number, return S_INT_MIN
- return S_INT_MIN;
- }
- }
- return atoi(charBuffer);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
deleted file mode 100644
index 61748227e..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICTIONARY_HEADER_READING_UTILS_H
-#define LATINIME_DICTIONARY_HEADER_READING_UTILS_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
-
-namespace latinime {
-
-class BinaryDictionaryInfo;
-
-class BinaryDictionaryHeaderReadingUtils {
- public:
- typedef uint16_t DictionaryFlags;
-
- static const int MAX_OPTION_KEY_LENGTH;
-
- static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo);
-
- static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo);
-
- static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
- return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
- return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
- return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE int getHeaderOptionsPosition(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
- switch (getHeaderVersion(dictionaryFormat)) {
- case HEADER_VERSION_2:
- return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
- + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
- break;
- default:
- return NOT_A_DICT_POS;
- }
- }
-
- static bool readHeaderValue(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const char *const key, int *outValue, const int outValueSize);
-
- static int readHeaderValueInt(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
-
- enum HEADER_VERSION {
- HEADER_VERSION_2,
- UNKNOWN_HEADER_VERSION
- };
-
- static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
- static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
- static const int VERSION_2_HEADER_FLAG_SIZE;
- static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
-
- static const DictionaryFlags NO_FLAGS;
- // Flags for special processing
- // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
- // something very bad (like, the apocalypse) will happen. Please update both at the same time.
- static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
- static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
- static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
- static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
-
- static HEADER_VERSION getHeaderVersion(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
- switch(formatVersion) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- // Fall through
- case BinaryDictionaryFormatUtils::VERSION_3:
- return HEADER_VERSION_2;
- default:
- return UNKNOWN_HEADER_VERSION;
- }
- }
-};
-}
-#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index c694c6a3a..e50baae0b 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -20,32 +20,17 @@
#include <stdint.h>
#include "defines.h"
-#include "jni.h"
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
-#include "suggest/core/dictionary/binary_dictionary_header.h"
-#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
-#include "utils/log_utils.h"
namespace latinime {
class BinaryDictionaryInfo {
public:
- AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
+ AK_FORCE_INLINE BinaryDictionaryInfo(const uint8_t *const dictBuf,
const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
- mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
- mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
- mDictBuf, mDictSize)),
- mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
- // TODO: Remove.
- mStructurePolicy(DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructurePolicy(this)) {
- logDictionaryInfo(env);
- }
+ mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable) {}
- ~BinaryDictionaryInfo() {
- delete mStructurePolicy;
- }
+ ~BinaryDictionaryInfo() {}
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -63,30 +48,12 @@ class BinaryDictionaryInfo {
return mDictBufOffset;
}
- AK_FORCE_INLINE const uint8_t *getDictRoot() const {
- return mDictRoot;
- }
-
- AK_FORCE_INLINE BinaryDictionaryFormatUtils::FORMAT_VERSION getFormat() const {
- return mDictionaryFormat;
- }
-
- // TODO: Move to DictionaryStructurePolicy.
- AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
- return &mDictionaryHeader;
- }
-
AK_FORCE_INLINE bool isDynamicallyUpdatable() const {
// TODO: Support dynamic dictionary formats.
const bool isUpdatableDictionaryFormat = false;
return mIsUpdatable && isUpdatableDictionaryFormat;
}
- // TODO: remove
- AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const {
- return mStructurePolicy;
- }
-
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
@@ -95,40 +62,6 @@ class BinaryDictionaryInfo {
const int mMmapFd;
const int mDictBufOffset;
const bool mIsUpdatable;
- const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
- // TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the
- // DictionaryStructurePolicy.
- const BinaryDictionaryHeader mDictionaryHeader;
- const uint8_t *const mDictRoot;
- // TODO: remove
- const DictionaryStructureWithBufferPolicy *const mStructurePolicy;
-
- AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
- const int BUFFER_SIZE = 16;
- int dictionaryIdCodePointBuffer[BUFFER_SIZE];
- int versionStringCodePointBuffer[BUFFER_SIZE];
- int dateStringCodePointBuffer[BUFFER_SIZE];
- mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
- dictionaryIdCodePointBuffer, BUFFER_SIZE);
- mDictionaryHeader.readHeaderValueOrQuestionMark("version",
- versionStringCodePointBuffer, BUFFER_SIZE);
- mDictionaryHeader.readHeaderValueOrQuestionMark("date",
- dateStringCodePointBuffer, BUFFER_SIZE);
-
- char dictionaryIdCharBuffer[BUFFER_SIZE];
- char versionStringCharBuffer[BUFFER_SIZE];
- char dateStringCharBuffer[BUFFER_SIZE];
- intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
- dictionaryIdCharBuffer, BUFFER_SIZE);
- intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
- versionStringCharBuffer, BUFFER_SIZE);
- intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
- dateStringCharBuffer, BUFFER_SIZE);
-
- LogUtils::logToJava(env,
- "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
- dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
- }
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h
new file mode 100644
index 000000000..558e0a5c3
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
+#define LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+
+namespace latinime {
+
+class BinaryDictionaryShortcutIterator {
+ public:
+ BinaryDictionaryShortcutIterator(
+ const DictionaryShortcutsStructurePolicy *const shortcutStructurePolicy,
+ const int shortcutPos)
+ : mShortcutStructurePolicy(shortcutStructurePolicy),
+ mPos(shortcutStructurePolicy->getStartPos(shortcutPos)),
+ mHasNextShortcutTarget(shortcutPos != NOT_A_DICT_POS) {}
+
+ AK_FORCE_INLINE bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as an int string and put it to outTarget, put its length
+ // to outTargetLength, put whether it is whitelist to outIsWhitelist.
+ AK_FORCE_INLINE void nextShortcutTarget(
+ const int maxDepth, int *const outTarget, int *const outTargetLength,
+ bool *const outIsWhitelist) {
+ mShortcutStructurePolicy->getNextShortcut(maxDepth, outTarget, outTargetLength,
+ outIsWhitelist, &mHasNextShortcutTarget, &mPos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryShortcutIterator);
+
+ const DictionaryShortcutsStructurePolicy *const mShortcutStructurePolicy;
+ int mPos;
+ bool mHasNextShortcutTarget;
+};
+} // namespace latinime
+#endif // LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
deleted file mode 100644
index cb73a577e..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-
-#include "suggest/core/dictionary/byte_array_utils.h"
-
-namespace latinime {
-
-typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils;
-
-const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-// Flag for presence of more attributes
-const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
-// Mask for attribute probability, stored on 4 bits inside the flags byte.
-const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
-// The numeric value of the shortcut probability that means 'whitelist'.
-const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
deleted file mode 100644
index c28f43ff7..000000000
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
-
-namespace latinime {
-
-class BinaryDictionaryTerminalAttributesReadingUtils {
- public:
- typedef uint8_t TerminalAttributeFlags;
- typedef TerminalAttributeFlags ShortcutFlags;
-
- static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
- const uint8_t *const dictRoot, int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
- }
-
- static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
- return flags & MASK_ATTRIBUTE_PROBABILITY;
- }
-
- static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) {
- return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
- }
-
- // Shortcuts reading methods
- // This method returns the size of the shortcut list region excluding the shortcut list size
- // field at the beginning.
- static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
- return ByteArrayUtils::readUint16AndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
- }
-
- static AK_FORCE_INLINE void skipShortcuts(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- const int shortcutListSize = getShortcutListSizeAndForwardPointer(
- binaryDictionaryInfo, pos);
- *pos += shortcutListSize;
- }
-
- static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
- return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
- }
-
- static AK_FORCE_INLINE int readShortcutTarget(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
- int *const outWord, int *const pos) {
- return ByteArrayUtils::readStringAndAdvancePosition(
- binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
-
- static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
- static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
- static const int ATTRIBUTE_ADDRESS_SHIFT;
- static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
- static const int WHITELIST_SHORTCUT_PROBABILITY;
-
- static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
- return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
- }
-
- static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) {
- return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
- /* Note: this is a value-dependant optimization of what may probably be
- more readably written this way:
- switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
- default: return 0;
- }
- */
- }
-};
-}
-#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 891b80331..af00e9927 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -18,33 +18,39 @@
#include "suggest/core/dictionary/dictionary.h"
-#include <map> // TODO: remove
#include <stdint.h>
#include "defines.h"
-#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
+#include "utils/log_utils.h"
namespace latinime {
Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
int dictBufOffset, bool isUpdatable)
- : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
+ : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd,
dictBufOffset, isUpdatable),
- mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
+ mDictionaryStructureWithBufferPolicy(DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(
+ static_cast<const uint8_t *>(dict), dictSize)),
+ mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
+ logDictionaryInfo(env);
}
Dictionary::~Dictionary() {
delete mBigramDictionary;
delete mGestureSuggest;
delete mTypingSuggest;
+ delete mDictionaryStructureWithBufferPolicy;
}
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
@@ -83,14 +89,12 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
}
int Dictionary::getProbability(const int *word, int length) const {
- const DictionaryStructureWithBufferPolicy *const structurePolicy =
- mBinaryDictionaryInfo.getStructurePolicy();
- int pos = structurePolicy->getTerminalNodePositionOfWord(word, length,
+ int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == pos) {
return NOT_A_PROBABILITY;
}
- return structurePolicy->getUnigramProbability(pos);
+ return getDictionaryStructurePolicy()->getUnigramProbability(pos);
}
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
@@ -126,4 +130,33 @@ void Dictionary::removeBigramWords(const int *const word0, const int length0,
// TODO: Support dynamic update
}
+void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
+ const int BUFFER_SIZE = 16;
+ int dictionaryIdCodePointBuffer[BUFFER_SIZE];
+ int versionStringCodePointBuffer[BUFFER_SIZE];
+ int dateStringCodePointBuffer[BUFFER_SIZE];
+ const DictionaryHeaderStructurePolicy *const headerPolicy =
+ getDictionaryStructurePolicy()->getHeaderStructurePolicy();
+ headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
+ BUFFER_SIZE);
+ headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
+ BUFFER_SIZE);
+ headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE);
+
+ char dictionaryIdCharBuffer[BUFFER_SIZE];
+ char versionStringCharBuffer[BUFFER_SIZE];
+ char dateStringCharBuffer[BUFFER_SIZE];
+ intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
+ dictionaryIdCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
+ versionStringCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
+ dateStringCharBuffer, BUFFER_SIZE);
+
+ LogUtils::logToJava(env,
+ "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
+ dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer,
+ mBinaryDictionaryInfo.getDictSize());
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 9f1e0729d..17ce47974 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -26,6 +26,7 @@
namespace latinime {
class BigramDictionary;
+class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
class SuggestInterface;
@@ -77,19 +78,27 @@ class Dictionary {
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
+ // TODO: Remove.
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const {
return &mBinaryDictionaryInfo;
}
+ const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
+ return mDictionaryStructureWithBufferPolicy;
+ }
+
virtual ~Dictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
const BinaryDictionaryInfo mBinaryDictionaryInfo;
+ DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
const BigramDictionary *mBigramDictionary;
SuggestInterface *mGestureSuggest;
SuggestInterface *mTypingSuggest;
+
+ void logDictionaryInfo(JNIEnv *const env) const;
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index af378b1b7..3271c1bfb 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -19,7 +19,7 @@
#include <cstdlib>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_header.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -35,8 +35,9 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
{ DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
- const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) {
- const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header);
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const int compositeGlyphCodePoint) {
+ const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy);
if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
return true;
}
@@ -45,11 +46,11 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
// Returns the digraph type associated with the given dictionary.
/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
- const BinaryDictionaryHeader *const header) {
- if (header->requiresGermanUmlautProcessing()) {
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ if (headerPolicy->requiresGermanUmlautProcessing()) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
- if (header->requiresFrenchLigatureProcessing()) {
+ if (headerPolicy->requiresFrenchLigatureProcessing()) {
return DIGRAPH_TYPE_FRENCH_LIGATURES;
}
return DIGRAPH_TYPE_NONE;
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h
index 9d74fe3a6..6ae16e390 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.h
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h
@@ -21,7 +21,7 @@
namespace latinime {
-class BinaryDictionaryHeader;
+class DictionaryHeaderStructurePolicy;
class DigraphUtils {
public:
@@ -39,14 +39,15 @@ class DigraphUtils {
typedef struct { int first; int second; int compositeGlyph; } digraph_t;
- static bool hasDigraphForCodePoint(
- const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint);
+ static bool hasDigraphForCodePoint(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const int compositeGlyphCodePoint);
static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
const DigraphCodePointIndex digraphCodePointIndex);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
- static DigraphType getDigraphTypeForDictionary(const BinaryDictionaryHeader *const header);
+ static DigraphType getDigraphTypeForDictionary(
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
static int getAllDigraphsForDigraphTypeAndReturnSize(
const DigraphType digraphType, const digraph_t **const digraphs);
static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint);
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
index 3c2180937..461d7b454 100644
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
@@ -19,21 +19,20 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/dictionary/terminal_attributes.h"
+#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
namespace latinime {
class ShortcutUtils {
public:
- static int outputShortcuts(const TerminalAttributes *const terminalAttributes,
+ static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
int outputWordIndex, const int finalScore, int *const outputCodePoints,
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
- TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
int shortcutTarget[MAX_WORD_LENGTH];
- while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
+ while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
bool isWhilelist;
int shortcutTargetStringLength;
- iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
+ shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetStringLength, &isWhilelist);
int shortcutScore;
int kind;
diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
deleted file mode 100644
index c40a3bb59..000000000
--- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
-#define LATINIME_TERMINAL_ATTRIBUTES_H
-
-#include <stdint.h>
-
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-
-namespace latinime {
-
-/**
- * This class encapsulates information about a terminal that allows to
- * retrieve local node attributes like the list of shortcuts without
- * exposing the format structure to the client.
- */
-class TerminalAttributes {
- public:
- class ShortcutIterator {
- public:
- ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int shortcutPos, const bool hasShortcutList)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
- mHasNextShortcutTarget(hasShortcutList) {}
-
- inline bool hasNextShortcutTarget() const {
- return mHasNextShortcutTarget;
- }
-
- // Gets the shortcut target itself as an int string and put it to outTarget, put its length
- // to outTargetLength, put whether it is whitelist to outIsWhitelist.
- AK_FORCE_INLINE void nextShortcutTarget(
- const int maxDepth, int *const outTarget, int *const outTargetLength,
- bool *const outIsWhitelist) {
- const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
- BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
- mBinaryDictionaryInfo->getDictRoot(), &mPos);
- mHasNextShortcutTarget =
- BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
- if (outIsWhitelist) {
- *outIsWhitelist =
- BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
- }
- if (outTargetLength) {
- *outTargetLength =
- BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
- mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
- }
- }
-
- private:
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- int mPos;
- bool mHasNextShortcutTarget;
- };
-
- TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const int shortcutPos)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {}
-
- inline ShortcutIterator getShortcutIterator() const {
- int shortcutPos = mShortcutListSizePos;
- const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS;
- if (hasShortcutList) {
- BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
- mBinaryDictionaryInfo, &shortcutPos);
- }
- // shortcutPos is never used if hasShortcutList is false.
- return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const int mShortcutListSizePos;
-};
-} // namespace latinime
-#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
new file mode 100644
index 000000000..a6829b476
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of dictionaries.
+ * Implement this policy to support additional dictionaries.
+ */
+class DictionaryHeaderStructurePolicy {
+ public:
+ virtual ~DictionaryHeaderStructurePolicy() {}
+
+ virtual bool supportsDynamicUpdate() const = 0;
+
+ virtual bool requiresGermanUmlautProcessing() const = 0;
+
+ virtual bool requiresFrenchLigatureProcessing() const = 0;
+
+ virtual float getMultiWordCostMultiplier() const = 0;
+
+ virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
+ int outValueSize) const = 0;
+
+ protected:
+ DictionaryHeaderStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryHeaderStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h
new file mode 100644
index 000000000..40b6c2de1
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of shortcuts.
+ */
+class DictionaryShortcutsStructurePolicy {
+ public:
+ virtual ~DictionaryShortcutsStructurePolicy() {}
+
+ virtual int getStartPos(const int pos) const = 0;
+
+ virtual void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const = 0;
+
+ virtual void skipAllShortcuts(int *const pos) const = 0;
+
+ protected:
+ DictionaryShortcutsStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryShortcutsStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index d83d1e390..1b34f03f0 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -24,6 +24,8 @@ namespace latinime {
class DicNode;
class DicNodeVector;
class DictionaryBigramsStructurePolicy;
+class DictionaryHeaderStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
/*
* This class abstracts structure of dictionaries.
@@ -64,8 +66,12 @@ class DictionaryStructureWithBufferPolicy {
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
+ virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
+
virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0;
+ virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index 58729229f..f9b777df2 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -148,7 +148,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
case CT_TERMINAL: {
const float languageImprobability =
DicNodeUtils::getBigramNodeImprobability(
- traverseSession->getBinaryDictionaryInfo(), dicNode, multiBigramMap);
+ traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap);
return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability);
}
case CT_TERMINAL_INSERTION:
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 11a147bda..0ca583f90 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -17,31 +17,29 @@
#include "suggest/core/session/dic_traverse_session.h"
#include "defines.h"
-#include "jni.h"
-#include "suggest/core/dictionary/binary_dictionary_header.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
int prevWordLength, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
- const BinaryDictionaryInfo *const binaryDictionaryInfo =
- mDictionary->getBinaryDictionaryInfo();
- mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier();
+ mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
+ ->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
mPrevWordPos = NOT_A_VALID_WORD_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}
@@ -56,8 +54,9 @@ void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
maxSpatialDistance, maxPointerCount);
}
-const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const {
- return mDictionary->getBinaryDictionaryInfo();
+const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStructurePolicy()
+ const {
+ return mDictionary->getDictionaryStructurePolicy();
}
void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) {
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 5c4cef02d..23de5cc65 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -28,8 +28,8 @@
namespace latinime {
-class BinaryDictionaryInfo;
class Dictionary;
+class DictionaryStructureWithBufferPolicy;
class ProximityInfo;
class SuggestOptions;
@@ -75,8 +75,7 @@ class DicTraverseSession {
const int maxPointerCount);
void resetCache(const int nextActiveCacheSize, const int maxWords);
- // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo.
- const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
+ const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const;
//--------------------
// getters and setters
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index f28efd526..3b77227a0 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -19,12 +19,12 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/shortcut_utils.h"
-#include "suggest/core/dictionary/terminal_attributes.h"
#include "suggest/core/layout/proximity_info.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/policy/scoring.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/policy/weighting.h"
@@ -107,7 +107,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
MAX_RESULTS);
// Create a new dic node here
DicNode rootNode;
- DicNodeUtils::initAsRoot(traverseSession->getBinaryDictionaryInfo(),
+ DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
traverseSession->getPrevWordPos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
@@ -211,15 +211,14 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
}
if (!terminalDicNode->hasMultipleWords()) {
- const BinaryDictionaryInfo *const binaryDictionaryInfo =
- traverseSession->getBinaryDictionaryInfo();
- const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
- binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
- terminalDicNode->getPos()));
+ BinaryDictionaryShortcutIterator shortcutIt(
+ traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getShortcutPositionOfNode(terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
+ outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex,
finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
}
DicNode::managedDelete(terminalDicNode);
@@ -298,7 +297,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
DicNodeUtils::getAllChildDicNodes(
- &dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes);
+ &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
const int childDicNodesSize = childDicNodes.getSizeAndLock();
for (int i = 0; i < childDicNodesSize; ++i) {
@@ -309,7 +308,8 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
continue;
}
if (DigraphUtils::hasDigraphForCodePoint(
- traverseSession->getBinaryDictionaryInfo()->getHeader(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getHeaderStructurePolicy(),
childDicNode->getNodeCodePoint())) {
correctionDicNode.initByCopy(childDicNode);
correctionDicNode.advanceDigraphIndex();
@@ -447,7 +447,7 @@ void Suggest::processDicNodeAsOmission(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
DicNodeVector childDicNodes;
DicNodeUtils::getAllChildDicNodes(
- dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes);
+ dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
const int size = childDicNodes.getSizeAndLock();
for (int i = 0; i < size; i++) {
@@ -472,7 +472,8 @@ void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
DicNode *dicNode) const {
const int16_t pointIndex = dicNode->getInputIndex(0);
DicNodeVector childDicNodes;
- DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(),
+ DicNodeUtils::getProximityChildDicNodes(dicNode,
+ traverseSession->getDictionaryStructurePolicy(),
traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes);
const int size = childDicNodes.getSizeAndLock();
for (int i = 0; i < size; i++) {
@@ -490,14 +491,15 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
DicNode *dicNode) const {
const int16_t pointIndex = dicNode->getInputIndex(0);
DicNodeVector childDicNodes1;
- DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(),
+ DicNodeUtils::getProximityChildDicNodes(dicNode,
+ traverseSession->getDictionaryStructurePolicy(),
traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1);
const int childSize1 = childDicNodes1.getSizeAndLock();
for (int i = 0; i < childSize1; i++) {
if (childDicNodes1[i]->hasChildren()) {
DicNodeVector childDicNodes2;
DicNodeUtils::getProximityChildDicNodes(
- childDicNodes1[i], traverseSession->getBinaryDictionaryInfo(),
+ childDicNodes1[i], traverseSession->getDictionaryStructurePolicy(),
traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2);
const int childSize2 = childDicNodes2.getSizeAndLock();
for (int j = 0; j < childSize2; j++) {
@@ -538,7 +540,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode
// Create a non-cached node here.
DicNode newDicNode;
DicNodeUtils::initAsRootWithPreviousWord(
- traverseSession->getBinaryDictionaryInfo(), dicNode, &newDicNode);
+ traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
const CorrectionType correctionType = spaceSubstitution ?
CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION;
Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp
index 6f4fcbfc2..6da0e8b85 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp
@@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h
index 6b2bfe8c9..d0c584bd5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h
@@ -20,7 +20,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
index f2c586245..34f092f49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -16,23 +16,23 @@
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+#include <stdint.h>
+
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructurePolicy(
- const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
- binaryDictionaryInfo);
- case BinaryDictionaryFormatUtils::VERSION_3:
- return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
- binaryDictionaryInfo);
+ ::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf,
+ const int dictSize) {
+ switch (FormatUtils::detectFormatVersion(dictBuf, dictSize)) {
+ case FormatUtils::VERSION_2:
+ return new PatriciaTriePolicy(dictBuf);
+ case FormatUtils::VERSION_3:
+ return new DynamicPatriciaTriePolicy(dictBuf);
default:
ASSERT(false);
return 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
index 95f82aabe..53eb8f927 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
@@ -17,18 +17,17 @@
#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
-#include "defines.h"
+#include <stdint.h>
+#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
-class BinaryDictionaryInfo;
-
class DictionaryStructureWithBufferPolicyFactory {
public:
- static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy(
- const BinaryDictionaryInfo *const binaryDictionaryInfo);
+ static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
+ const uint8_t *const dictBuf, const int dictSize);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index a6214d3e4..77a85c86d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -16,42 +16,40 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
- const uint8_t *const dictRoot = mBinaryDictionaryInfo->getDictRoot();
int pos = nodePos;
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
const int parentPos =
- DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos);
+ DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos);
mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS;
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictRoot, mFlags, maxCodePointCount, outCodePoints, &pos);
+ mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos);
} else {
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- dictRoot, mFlags, MAX_WORD_LENGTH, &pos);
+ mDictRoot, mFlags, MAX_WORD_LENGTH, &pos);
}
if (isTerminal()) {
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
} else {
mProbability = NOT_A_PROBABILITY;
}
if (hasChildren()) {
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictRoot, mFlags, &pos);
+ mDictRoot, mFlags, &pos);
} else {
mChildrenPos = NOT_A_DICT_POS;
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
mShortcutPos = pos;
- BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
+ mShortcutsPolicy->skipAllShortcuts(&pos);
} else {
mShortcutPos = NOT_A_DICT_POS;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
index 4f22db738..e990809e8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -25,8 +25,8 @@
namespace latinime {
-class BinaryDictionaryInfo;
class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
/*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
@@ -34,13 +34,14 @@ class DictionaryBigramsStructurePolicy;
*/
class DynamicPatriciaTrieNodeReader {
public:
- DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy)
- : mBinaryDictionaryInfo(binaryDictionaryInfo), mBigramsPolicy(bigramsPolicy),
- mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
- mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_VALID_WORD_POS) {}
+ DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy),
+ mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
+ mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
~DynamicPatriciaTrieNodeReader() {}
@@ -120,8 +121,10 @@ class DynamicPatriciaTrieNodeReader {
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ // TODO: Consolidate mDictRoot.
+ const uint8_t *const mDictRoot;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mNodePos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 9b384157e..7d3b2e28e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -19,7 +19,6 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
@@ -34,7 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
if (!dicNode->hasChildren()) {
return;
}
- DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
+ getShortcutsStructurePolicy());
int mergedNodeCodePoints[MAX_WORD_LENGTH];
int nextPos = dicNode->getChildrenPos();
int totalChildCount = 0;
@@ -79,7 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
int mergedNodeCodePoints[maxCodePointCount];
int codePointCount = 0;
- DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
+ getShortcutsStructurePolicy());
// First, read terminal node and get its probability.
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
mergedNodeCodePoints);
@@ -123,7 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
int mergedNodeCodePoints[MAX_WORD_LENGTH];
int currentLength = 0;
int pos = getRootPosition();
- DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
+ getShortcutsStructurePolicy());
while (currentLength <= length) {
// When foundMatchedNode becomes true, currentLength is increased at least once.
bool foundMatchedNode = false;
@@ -194,7 +196,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
- DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
+ getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
@@ -206,7 +209,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
+ getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
@@ -218,7 +222,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
+ getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 1e94140c4..56475b137 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -22,19 +22,19 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
namespace latinime {
-class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- DynamicPatriciaTriePolicy(const uint8_t *const dictRoot,
- const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo),
- mBigramListPolicy(dictRoot) {}
+ DynamicPatriciaTriePolicy(const uint8_t *const dictBuf)
+ : mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
~DynamicPatriciaTriePolicy() {}
@@ -58,18 +58,27 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getBigramsPositionOfNode(const int nodePos) const;
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
+ const HeaderPolicy mHeaderPolicy;
+ // TODO: Consolidate mDictRoot.
const uint8_t *const mDictRoot;
- // TODO: remove
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const BigramListPolicy mBigramListPolicy;
+ const ShortcutListPolicy mShortcutListPolicy;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 0de6341b0..1ef3b65c3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -17,7 +17,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index 5398d7e37..a6cb46d39 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -20,7 +20,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
new file mode 100644
index 000000000..eb828b58c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+
+namespace latinime {
+
+const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
+ "MULTIPLE_WORDS_DEMOTION_RATE";
+const float HeaderPolicy::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
+const float HeaderPolicy::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
+
+float HeaderPolicy::readMultiWordCostMultiplier() const {
+ const int headerValue = HeaderReadingUtils::readHeaderValueInt(
+ mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
+ if (headerValue == S_INT_MIN) {
+ // not found
+ return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
+ }
+ if (headerValue <= 0) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 240512bce..e3e6fc077 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -14,38 +14,40 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H
-#define LATINIME_BINARY_DICTIONARY_HEADER_H
+#ifndef LATINIME_HEADER_POLICY_H
+#define LATINIME_HEADER_POLICY_H
+
+#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
namespace latinime {
-class BinaryDictionaryInfo;
-
-/**
- * This class abstracts dictionary header structures and provide interface to access dictionary
- * header information.
- */
-class BinaryDictionaryHeader {
+class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
- explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+ explicit HeaderPolicy(const uint8_t *const dictBuf)
+ : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)),
+ mSize(HeaderReadingUtils::getHeaderSize(dictBuf)),
+ mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
+
+ ~HeaderPolicy() {}
AK_FORCE_INLINE int getSize() const {
return mSize;
}
AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return BinaryDictionaryHeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
+ return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return BinaryDictionaryHeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return BinaryDictionaryHeaderReadingUtils::requiresFrenchLigatureProcessing(
+ return HeaderReadingUtils::requiresFrenchLigatureProcessing(
mDictionaryFlags);
}
@@ -60,7 +62,7 @@ class BinaryDictionaryHeader {
outValue[0] = '\0';
return;
}
- if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
+ if (!HeaderReadingUtils::readHeaderValue(mDictBuf,
key, outValue, outValueSize)) {
outValue[0] = '?';
outValue[1] = '\0';
@@ -68,18 +70,19 @@ class BinaryDictionaryHeader {
}
private:
- DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER;
static const float MULTI_WORD_COST_MULTIPLIER_SCALE;
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags;
+ const uint8_t *const mDictBuf;
+ const HeaderReadingUtils::DictionaryFlags mDictionaryFlags;
const int mSize;
const float mMultiWordCostMultiplier;
float readMultiWordCostMultiplier() const;
};
+
} // namespace latinime
-#endif // LATINIME_BINARY_DICTIONARY_HEADER_H
+#endif /* LATINIME_HEADER_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
new file mode 100644
index 000000000..23b88ecdf
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
+
+#include <cctype>
+#include <cstdlib>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const int HeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
+
+const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
+const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2;
+const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4;
+
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) {
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
+ + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
+}
+
+/* static */ HeaderReadingUtils::DictionaryFlags
+ HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) {
+ return ByteArrayUtils::readUint16(dictBuf,
+ HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
+}
+
+// Returns if the key is found or not and reads the found value into outValue.
+/* static */ bool HeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf,
+ const char *const key, int *outValue, const int outValueSize) {
+ if (outValueSize <= 0) {
+ return false;
+ }
+ const int headerSize = getHeaderSize(dictBuf);
+ int pos = getHeaderOptionsPosition();
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return false;
+ }
+ while (pos < headerSize) {
+ if(ByteArrayUtils::compareStringInBufferWithCharArray(
+ dictBuf, key, headerSize - pos, &pos) == 0) {
+ // The key was found.
+ const int length = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, outValueSize,
+ outValue, &pos);
+ // Add a 0 terminator to the string.
+ outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
+ return true;
+ }
+ ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos);
+ }
+ // The key was not found.
+ return false;
+}
+
+/* static */ int HeaderReadingUtils::readHeaderValueInt(
+ const uint8_t *const dictBuf, const char *const key) {
+ const int bufferSize = LARGEST_INT_DIGIT_COUNT;
+ int intBuffer[bufferSize];
+ char charBuffer[bufferSize];
+ if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) {
+ return S_INT_MIN;
+ }
+ for (int i = 0; i < bufferSize; ++i) {
+ charBuffer[i] = intBuffer[i];
+ if (charBuffer[i] == '0') {
+ break;
+ }
+ if (!isdigit(charBuffer[i])) {
+ // If not a number, return S_INT_MIN
+ return S_INT_MIN;
+ }
+ }
+ return atoi(charBuffer);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h
new file mode 100644
index 000000000..c94919640
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HEADER_READING_UTILS_H
+#define LATINIME_HEADER_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class HeaderReadingUtils {
+ public:
+ typedef uint16_t DictionaryFlags;
+
+ static const int MAX_OPTION_KEY_LENGTH;
+
+ static int getHeaderSize(const uint8_t *const dictBuf);
+
+ static DictionaryFlags getFlags(const uint8_t *const dictBuf);
+
+ static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
+ return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
+ return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
+ return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE int getHeaderOptionsPosition() {
+ return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ + HEADER_SIZE_FIELD_SIZE;
+ }
+
+ static bool readHeaderValue(const uint8_t *const dictBuf,
+ const char *const key, int *outValue, const int outValueSize);
+
+ static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils);
+
+ static const int HEADER_MAGIC_NUMBER_SIZE;
+ static const int HEADER_DICTIONARY_VERSION_SIZE;
+ static const int HEADER_FLAG_SIZE;
+ static const int HEADER_SIZE_FIELD_SIZE;
+
+ static const DictionaryFlags NO_FLAGS;
+ // Flags for special processing
+ // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
+ // something very bad (like, the apocalypse) will happen. Please update both at the same time.
+ static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
+ static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
+ static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
+ static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+};
+}
+#endif /* LATINIME_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index 7001509ab..8ce2b3ea0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -20,8 +20,6 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/policyimpl/dictionary/binary_format.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
@@ -112,7 +110,7 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
+ mShortcutListPolicy.skipAllShortcuts(&pos);;
}
return pos;
}
@@ -133,7 +131,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
mDictRoot, flags, &pos) : NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
+ getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
}
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
getBigramsStructurePolicy()->skipAllBigrams(&pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 2f81aedb8..bebe1bfff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -22,19 +22,19 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
namespace latinime {
-class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const uint8_t *const dictRoot,
- const BinaryDictionaryInfo *const binaryDictionaryInfo)
- : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo),
- mBigramListPolicy(dictRoot) {}
+ PatriciaTriePolicy(const uint8_t *const dictBuf)
+ : mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
~PatriciaTriePolicy() {}
@@ -58,17 +58,25 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getBigramsPositionOfNode(const int nodePos) const;
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
+ const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
- // TODO: remove
- const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const BigramListPolicy mBigramListPolicy;
+ const ShortcutListPolicy mShortcutListPolicy;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
index 89e981df8..003b9435b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -17,7 +17,7 @@
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
index 002c3f19b..9f2fc2059 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -20,7 +20,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
new file mode 100644
index 000000000..d73f73953
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_POLICY_H
+#define LATINIME_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+
+namespace latinime {
+
+class ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit ShortcutListPolicy(const uint8_t *const shortcutBuf)
+ : mShortcutsBuf(shortcutBuf) {}
+
+ ~ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int listPos = pos;
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mShortcutsBuf, &listPos);
+ return listPos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mShortcutsBuf, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ mShortcutsBuf, maxCodePointCount, outCodePoint, pos);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos);
+ *pos += shortcutListSize;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListPolicy);
+
+ const uint8_t *const mShortcutsBuf;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
new file mode 100644
index 000000000..e70bb5071
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+
+namespace latinime {
+
+// Flag for presence of more attributes
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
+// The numeric value of the shortcut probability that means 'whitelist'.
+const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
new file mode 100644
index 000000000..b5bb964d5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H
+#define LATINIME_SHORTCUT_LIST_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+class ShortcutListReadingUtils {
+ public:
+ typedef uint8_t ShortcutFlags;
+
+ static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
+ }
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const ShortcutFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // This method returns the size of the shortcut list region excluding the shortcut list size
+ // field at the beginning.
+ static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
+ return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos)
+ - SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
+ static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) {
+ const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos);
+ *pos += shortcutListSize;
+ }
+
+ static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
+ return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE int readShortcutTarget(
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord,
+ int *const pos) {
+ return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils);
+
+ static const ShortcutFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const ShortcutFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ static const int WHITELIST_SHORTCUT_PROBABILITY;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_READING_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
index 68b1d5d15..a84cfb9d5 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 75ccfc766..75ccfc766 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 0e8d72f2e..3796c7b7b 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -14,7 +14,9 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -22,22 +24,19 @@ namespace latinime {
* Dictionary size
*/
// Any file smaller than this is not a dictionary.
-const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
+const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
/**
* Format versions
*/
-
-// The versions of Latin IME that only handle format version 1 only test for the magic
-// number, so we had to change it so that version 2 files would be rejected by older
-// implementations. On this occasion, we made the magic number 32 bits long.
-const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported
+// or obsolete dictionary formats.
+const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
+const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
-/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
- BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
- const int dictSize) {
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
+ const uint8_t *const dict, const int dictSize) {
// The magic number is stored big-endian.
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
// understand this format.
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 830684c70..f84321577 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -14,24 +14,19 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
+#ifndef LATINIME_FORMAT_UTILS_H
+#define LATINIME_FORMAT_UTILS_H
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/byte_array_utils.h"
namespace latinime {
/**
* Methods to handle binary dictionary format version.
- *
- * Currently, we have a file with a similar name, binary_format.h. binary_format.h contains binary
- * reading methods and utility methods for various purposes.
- * On the other hand, this file deals with only about dictionary format version.
*/
-class BinaryDictionaryFormatUtils {
+class FormatUtils {
public:
enum FORMAT_VERSION {
VERSION_2,
@@ -42,11 +37,11 @@ class BinaryDictionaryFormatUtils {
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
-#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
+#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 7cddb0882..b6aa85896 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -155,7 +155,8 @@ class TypingWeighting : public Weighting {
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode,
MultiBigramMap *const multiBigramMap) const {
- return DicNodeUtils::getBigramNodeImprobability(traverseSession->getBinaryDictionaryInfo(),
+ return DicNodeUtils::getBigramNodeImprobability(
+ traverseSession->getDictionaryStructurePolicy(),
dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}