aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroynagi <ksk@google.com>2013-06-10 20:20:13 -0700
committerAndroid Git Automerger <android-git-automerger@android.com>2013-06-10 20:20:13 -0700
commit1784d92e6f82abd47cb5dac0a523d23bf73f76f2 (patch)
tree02157dff17d49ff4dfcd505bd15f5f0ed1bbbdb2
parentf0bdb131aba520b6b1920bfc3e418c6b1f63eb26 (diff)
parente3338420f6a0c0491d55af0536123f71959d9f80 (diff)
downloadlatinime-1784d92e6f82abd47cb5dac0a523d23bf73f76f2.tar.gz
latinime-1784d92e6f82abd47cb5dac0a523d23bf73f76f2.tar.xz
latinime-1784d92e6f82abd47cb5dac0a523d23bf73f76f2.zip
am e3338420: Merge "Introduce BinaryDictionaryHeader to access binary dictionary header."
* commit 'e3338420f6a0c0491d55af0536123f71959d9f80': Introduce BinaryDictionaryHeader to access binary dictionary header.
-rw-r--r--native/jni/Android.mk4
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp2
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_format.cpp)6
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_format.h)23
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp49
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.h70
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp121
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h102
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h18
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_format.h156
-rw-r--r--native/jni/src/suggest/core/dictionary/byte_array_utils.h39
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp11
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.cpp24
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.h11
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp10
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h1
-rw-r--r--native/jni/src/suggest/core/suggest.cpp4
18 files changed, 424 insertions, 233 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 7ca405752..9718cf5fb 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -55,7 +55,9 @@ LATIN_IME_CORE_SRC_FILES := \
dic_nodes_cache.cpp) \
$(addprefix suggest/core/dictionary/, \
bigram_dictionary.cpp \
- binary_dictionary_format.cpp \
+ binary_dictionary_format_utils.cpp \
+ binary_dictionary_header.cpp \
+ binary_dictionary_header_reading_utils.cpp \
byte_array_utils.cpp \
dictionary.cpp \
digraph_utils.cpp) \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index f60793733..8490e32bc 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -27,7 +27,7 @@
#include "jni.h"
#include "jni_common.h"
#include "obsolete/correction.h"
-#include "suggest/core/dictionary/binary_dictionary_format.h"
+#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h"
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
index 50e0211d7..737df63c7 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/binary_dictionary_format.h"
+#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
namespace latinime {
@@ -31,7 +31,6 @@ const int BinaryDictionaryFormat::DICTIONARY_MINIMUM_SIZE = 4;
// then options that must be 0. Hence the first 32-bits of the format are always as follow
// and it's okay to consider them a magic number as a whole.
const uint32_t BinaryDictionaryFormat::FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
-const int BinaryDictionaryFormat::FORMAT_VERSION_1_HEADER_SIZE = 5;
// The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older
@@ -39,9 +38,6 @@ const int BinaryDictionaryFormat::FORMAT_VERSION_1_HEADER_SIZE = 5;
const uint32_t BinaryDictionaryFormat::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
const int BinaryDictionaryFormat::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
-const int BinaryDictionaryFormat::VERSION_2_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryFormat::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryFormat::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
/* static */ BinaryDictionaryFormat::FORMAT_VERSION BinaryDictionaryFormat::detectFormatVersion(
const uint8_t *const dict, const int dictSize) {
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
index 3aa1662da..c0fd56111 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_H
-#define LATINIME_BINARY_DICTIONARY_FORMAT_H
+#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
+#define LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
#include <stdint.h>
@@ -42,30 +42,13 @@ class BinaryDictionaryFormat {
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
- static AK_FORCE_INLINE int getHeaderSize(
- const uint8_t *const dict, const FORMAT_VERSION format) {
- switch (format) {
- case VERSION_1:
- return FORMAT_VERSION_1_HEADER_SIZE;
- case VERSION_2:
- // See the format of the header in the comment in detectFormat() above
- return ByteArrayUtils::readUint32(dict, 8);
- default:
- return S_INT_MAX;
- }
- }
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormat);
static const int DICTIONARY_MINIMUM_SIZE;
static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER;
- static const int FORMAT_VERSION_1_HEADER_SIZE;
static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER;
static const int FORMAT_VERSION_2_MINIMUM_SIZE;
- static const int VERSION_2_MAGIC_NUMBER_SIZE;
- static const int VERSION_2_DICTIONARY_VERSION_SIZE ;
- static const int VERSION_2_DICTIONARY_FLAG_SIZE;
};
} // namespace latinime
-#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_H */
+#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
new file mode 100644
index 000000000..04bb81f71
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/binary_dictionary_header.h"
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+
+namespace latinime {
+
+const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
+ "MULTIPLE_WORDS_DEMOTION_RATE";
+const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
+const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
+
+BinaryDictionaryHeader::BinaryDictionaryHeader(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo),
+ mDictionaryFlags(BinaryDictionaryHeaderReader::getFlags(binaryDictionaryInfo)),
+ mSize(BinaryDictionaryHeaderReader::getHeaderSize(binaryDictionaryInfo)),
+ mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
+
+float BinaryDictionaryHeader::readMultiWordCostMultiplier() const {
+ const int headerValue = BinaryDictionaryHeaderReader::readHeaderValueInt(
+ mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
+ if (headerValue == S_INT_MIN) {
+ // not found
+ return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
+ }
+ if (headerValue <= 0) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
new file mode 100644
index 000000000..9db000362
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H
+#define LATINIME_BINARY_DICTIONARY_HEADER_H
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
+
+namespace latinime {
+
+class BinaryDictionaryInfo;
+
+/**
+ * This class abstracts dictionary header structures and provide interface to access dictionary
+ * header information.
+ */
+class BinaryDictionaryHeader {
+ public:
+ explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+
+ AK_FORCE_INLINE int getSize() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE bool supportsDynamicUpdate() const {
+ return BinaryDictionaryHeaderReader::supportsDynamicUpdate(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
+ return BinaryDictionaryHeaderReader::requiresGermanUmlautProcessing(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
+ return BinaryDictionaryHeaderReader::requiresFrenchLigatureProcessing(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
+ return mMultiWordCostMultiplier;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
+
+ static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
+ static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER;
+ static const float MULTI_WORD_COST_MULTIPLIER_SCALE;
+
+ const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ const BinaryDictionaryHeaderReader::DictionaryFlags mDictionaryFlags;
+ const int mSize;
+ const float mMultiWordCostMultiplier;
+
+ float readMultiWordCostMultiplier() const;
+};
+} // namespace latinime
+#endif // LATINIME_BINARY_DICTIONARY_HEADER_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
new file mode 100644
index 000000000..c09a78f03
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
+
+#include <cctype>
+#include <cstdlib>
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+
+namespace latinime {
+
+const int BinaryDictionaryHeaderReader::MAX_OPTION_KEY_LENGTH = 256;
+
+const int BinaryDictionaryHeaderReader::FORMAT_VERSION_1_HEADER_SIZE = 5;
+
+const int BinaryDictionaryHeaderReader::VERSION_2_MAGIC_NUMBER_SIZE = 4;
+const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
+const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
+const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
+
+const BinaryDictionaryHeaderReader::DictionaryFlags BinaryDictionaryHeaderReader::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const BinaryDictionaryHeaderReader::DictionaryFlags
+ BinaryDictionaryHeaderReader::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const BinaryDictionaryHeaderReader::DictionaryFlags
+ BinaryDictionaryHeaderReader::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const BinaryDictionaryHeaderReader::DictionaryFlags
+ BinaryDictionaryHeaderReader::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+/* static */ int BinaryDictionaryHeaderReader::getHeaderSize(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo) {
+ switch (binaryDictionaryInfo->getFormat()) {
+ case BinaryDictionaryFormat::VERSION_1:
+ return FORMAT_VERSION_1_HEADER_SIZE;
+ case BinaryDictionaryFormat::VERSION_2:
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
+ VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
+ + VERSION_2_DICTIONARY_FLAG_SIZE);
+ default:
+ return S_INT_MAX;
+ }
+}
+
+/* static */ BinaryDictionaryHeaderReader::DictionaryFlags BinaryDictionaryHeaderReader::getFlags(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo) {
+ switch (binaryDictionaryInfo->getFormat()) {
+ case BinaryDictionaryFormat::VERSION_1:
+ return NO_FLAGS;
+ case BinaryDictionaryFormat::VERSION_2:
+ return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
+ VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
+ default:
+ return NO_FLAGS;
+ }
+}
+
+// Returns if the key is found or not and reads the found value into outValue.
+/* static */ bool BinaryDictionaryHeaderReader::readHeaderValue(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const char *const key, int *outValue, const int outValueSize) {
+ if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
+ return false;
+ }
+ const int headerSize = getHeaderSize(binaryDictionaryInfo);
+ int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
+ while (pos < headerSize) {
+ if(ByteArrayUtils::compareStringInBufferWithCharArray(
+ binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
+ // The key was found.
+ ByteArrayUtils::readStringAndAdvancePosition(
+ binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
+ return true;
+ }
+ ByteArrayUtils::advancePositionToBehindString(
+ binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos);
+ }
+ // The key was not found.
+ return false;
+}
+
+/* static */ int BinaryDictionaryHeaderReader::readHeaderValueInt(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) {
+ const int bufferSize = LARGEST_INT_DIGIT_COUNT;
+ int intBuffer[bufferSize];
+ char charBuffer[bufferSize];
+ if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) {
+ return S_INT_MIN;
+ }
+ for (int i = 0; i < bufferSize; ++i) {
+ charBuffer[i] = intBuffer[i];
+ if (charBuffer[i] == '0') {
+ break;
+ }
+ if (!isdigit(charBuffer[i])) {
+ // If not a number, return S_INT_MIN
+ return S_INT_MIN;
+ }
+ }
+ return atoi(charBuffer);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
new file mode 100644
index 000000000..6e9dca73c
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_HEADER_READING_UTILS_H
+#define LATINIME_DICTIONARY_HEADER_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
+
+namespace latinime {
+
+class BinaryDictionaryInfo;
+
+class BinaryDictionaryHeaderReader {
+ public:
+ typedef uint16_t DictionaryFlags;
+
+ static const int MAX_OPTION_KEY_LENGTH;
+
+ static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+
+ static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo);
+
+ static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
+ return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
+ return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
+ return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasHeaderAttributes(
+ const BinaryDictionaryFormat::FORMAT_VERSION format) {
+ // Only format 2 and above have header attributes as {key,value} string pairs.
+ switch (format) {
+ case BinaryDictionaryFormat::VERSION_2:
+ return true;
+ break;
+ default:
+ return false;
+ }
+ }
+
+ static AK_FORCE_INLINE int getHeaderOptionsPosition(
+ const BinaryDictionaryFormat::FORMAT_VERSION format) {
+ switch (format) {
+ case BinaryDictionaryFormat::VERSION_2:
+ return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
+ + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ break;
+ default:
+ return 0;
+ }
+ }
+
+ static bool readHeaderValue(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const char *const key, int *outValue, const int outValueSize);
+
+ static int readHeaderValueInt(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReader);
+
+ static const int FORMAT_VERSION_1_HEADER_SIZE;
+
+ static const int VERSION_2_MAGIC_NUMBER_SIZE;
+ static const int VERSION_2_DICTIONARY_VERSION_SIZE;
+ static const int VERSION_2_DICTIONARY_FLAG_SIZE;
+ static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+
+ static const DictionaryFlags NO_FLAGS;
+ // Flags for special processing
+ // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
+ // something very bad (like, the apocalypse) will happen. Please update both at the same time.
+ static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
+ static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
+ static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
+ static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+};
+}
+#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index 8508c6786..0b77e5ee9 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -20,16 +20,19 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_format.h"
+#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_header.h"
namespace latinime {
+class BinaryDictionaryHeader;
+
class BinaryDictionaryInfo {
public:
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize)
: mDictBuf(dictBuf),
- mFormat(BinaryDictionaryFormat::detectFormatVersion(mDictBuf, dictSize)),
- mDictRoot(mDictBuf + BinaryDictionaryFormat::getHeaderSize(mDictBuf, mFormat)) {}
+ mDictionaryFormat(BinaryDictionaryFormat::detectFormatVersion(mDictBuf, dictSize)),
+ mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {}
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -40,18 +43,23 @@ class BinaryDictionaryInfo {
}
AK_FORCE_INLINE BinaryDictionaryFormat::FORMAT_VERSION getFormat() const {
- return mFormat;
+ return mDictionaryFormat;
}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
+ AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
+ return &mDictionaryHeader;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
const uint8_t *const mDictBuf;
- const BinaryDictionaryFormat::FORMAT_VERSION mFormat;
+ const BinaryDictionaryFormat::FORMAT_VERSION mDictionaryFormat;
+ const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
};
}
diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h
index c82065f97..f580bdad5 100644
--- a/native/jni/src/suggest/core/dictionary/binary_format.h
+++ b/native/jni/src/suggest/core/dictionary/binary_format.h
@@ -17,7 +17,6 @@
#ifndef LATINIME_BINARY_FORMAT_H
#define LATINIME_BINARY_FORMAT_H
-#include <cstdlib>
#include <stdint.h>
#include "suggest/core/dictionary/bloom_filter.h"
@@ -61,17 +60,9 @@ class BinaryFormat {
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
- static const int UNKNOWN_FORMAT = -1;
static const int SHORTCUT_LIST_SIZE_SIZE = 2;
- static int detectFormat(const uint8_t *const dict, const int dictSize);
- static int getHeaderSize(const uint8_t *const dict, const int dictSize);
- static int getFlags(const uint8_t *const dict, const int dictSize);
static bool hasBlacklistedOrNotAWordFlag(const int flags);
- static void readHeaderValue(const uint8_t *const dict, const int dictSize,
- const char *const key, int *outValue, const int outValueSize);
- static int readHeaderValueInt(const uint8_t *const dict, const int dictSize,
- const char *const key);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
@@ -93,20 +84,11 @@ class BinaryFormat {
int *outWord, int *outUnigramProbability);
static int getBigramProbabilityFromHashMap(const int position,
const hash_map_compat<int, int> *bigramMap, const int unigramProbability);
- static float getMultiWordCostMultiplier(const uint8_t *const dict, const int dictSize);
static void fillBigramProbabilityToHashMap(const uint8_t *const root, int position,
hash_map_compat<int, int> *bigramMap);
static int getBigramProbability(const uint8_t *const root, int position,
const int nextPosition, const int unigramProbability);
- // Flags for special processing
- // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
- // something very bad (like, the apocalypse) will happen. Please update both at the same time.
- enum {
- REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1,
- REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
- };
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
static int getBigramListPositionForWordPosition(const uint8_t *const root, int position);
@@ -119,20 +101,6 @@ class BinaryFormat {
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
- // Any file smaller than this is not a dictionary.
- static const int DICTIONARY_MINIMUM_SIZE = 4;
- // Originally, format version 1 had a 16-bit magic number, then the version number `01'
- // then options that must be 0. Hence the first 32-bits of the format are always as follow
- // and it's okay to consider them a magic number as a whole.
- static const int FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
- static const int FORMAT_VERSION_1_HEADER_SIZE = 5;
- // The versions of Latin IME that only handle format version 1 only test for the magic
- // number, so we had to change it so that version 2 files would be rejected by older
- // implementations. On this occasion, we made the magic number 32 bits long.
- static const int FORMAT_VERSION_2_MAGIC_NUMBER = -1681835266; // 0x9BC13AFE
- // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
- static const int FORMAT_VERSION_2_MINIMUM_SIZE = 12;
-
static const int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
static const int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
static const int CHARACTER_ARRAY_TERMINATOR = 0x1F;
@@ -142,122 +110,10 @@ class BinaryFormat {
static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
};
-AK_FORCE_INLINE int BinaryFormat::detectFormat(const uint8_t *const dict, const int dictSize) {
- // The magic number is stored big-endian.
- // If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
- // understand this format.
- if (dictSize < DICTIONARY_MINIMUM_SIZE) return UNKNOWN_FORMAT;
- const int magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3];
- switch (magicNumber) {
- case FORMAT_VERSION_1_MAGIC_NUMBER:
- // Format 1 header is exactly 5 bytes long and looks like:
- // Magic number (2 bytes) 0x78 0xB1
- // Version number (1 byte) 0x01
- // Options (2 bytes) must be 0x00 0x00
- return 1;
- case FORMAT_VERSION_2_MAGIC_NUMBER:
- // Version 2 dictionaries are at least 12 bytes long (see below details for the header).
- // If this dictionary has the version 2 magic number but is less than 12 bytes long, then
- // it's an unknown format and we need to avoid confidently reading the next bytes.
- if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) return UNKNOWN_FORMAT;
- // Format 2 header is as follows:
- // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
- // Version number (2 bytes) 0x00 0x02
- // Options (2 bytes)
- // Header size (4 bytes) : integer, big endian
- return (dict[4] << 8) + dict[5];
- default:
- return UNKNOWN_FORMAT;
- }
-}
-
-inline int BinaryFormat::getFlags(const uint8_t *const dict, const int dictSize) {
- switch (detectFormat(dict, dictSize)) {
- case 1:
- return NO_FLAGS; // TODO: NO_FLAGS is unused anywhere else?
- default:
- return (dict[6] << 8) + dict[7];
- }
-}
-
inline bool BinaryFormat::hasBlacklistedOrNotAWordFlag(const int flags) {
return (flags & (FLAG_IS_BLACKLISTED | FLAG_IS_NOT_A_WORD)) != 0;
}
-inline int BinaryFormat::getHeaderSize(const uint8_t *const dict, const int dictSize) {
- switch (detectFormat(dict, dictSize)) {
- case 1:
- return FORMAT_VERSION_1_HEADER_SIZE;
- case 2:
- // See the format of the header in the comment in detectFormat() above
- return (dict[8] << 24) + (dict[9] << 16) + (dict[10] << 8) + dict[11];
- default:
- return S_INT_MAX;
- }
-}
-
-inline void BinaryFormat::readHeaderValue(const uint8_t *const dict, const int dictSize,
- const char *const key, int *outValue, const int outValueSize) {
- int outValueIndex = 0;
- // Only format 2 and above have header attributes as {key,value} string pairs. For prior
- // formats, we just return an empty string, as if the key wasn't found.
- if (2 <= detectFormat(dict, dictSize)) {
- const int headerOptionsOffset = 4 /* magic number */
- + 2 /* dictionary version */ + 2 /* flags */;
- const int headerSize =
- (dict[headerOptionsOffset] << 24) + (dict[headerOptionsOffset + 1] << 16)
- + (dict[headerOptionsOffset + 2] << 8) + dict[headerOptionsOffset + 3];
- const int headerEnd = headerOptionsOffset + 4 + headerSize;
- int index = headerOptionsOffset + 4;
- while (index < headerEnd) {
- int keyIndex = 0;
- int codePoint = getCodePointAndForwardPointer(dict, &index);
- while (codePoint != NOT_A_CODE_POINT) {
- if (codePoint != key[keyIndex++]) {
- break;
- }
- codePoint = getCodePointAndForwardPointer(dict, &index);
- }
- if (codePoint == NOT_A_CODE_POINT && key[keyIndex] == 0) {
- // We found the key! Copy and return the value.
- codePoint = getCodePointAndForwardPointer(dict, &index);
- while (codePoint != NOT_A_CODE_POINT && outValueIndex < outValueSize) {
- outValue[outValueIndex++] = codePoint;
- codePoint = getCodePointAndForwardPointer(dict, &index);
- }
- // Finished copying. Break to go to the termination code.
- break;
- }
- // We didn't find the key, skip the remainder of it and its value
- while (codePoint != NOT_A_CODE_POINT) {
- codePoint = getCodePointAndForwardPointer(dict, &index);
- }
- codePoint = getCodePointAndForwardPointer(dict, &index);
- while (codePoint != NOT_A_CODE_POINT) {
- codePoint = getCodePointAndForwardPointer(dict, &index);
- }
- }
- // We couldn't find it - fall through and return an empty value.
- }
- // Put a terminator 0 if possible at all (always unless outValueSize is <= 0)
- if (outValueIndex >= outValueSize) outValueIndex = outValueSize - 1;
- if (outValueIndex >= 0) outValue[outValueIndex] = 0;
-}
-
-inline int BinaryFormat::readHeaderValueInt(const uint8_t *const dict, const int dictSize,
- const char *const key) {
- const int bufferSize = LARGEST_INT_DIGIT_COUNT;
- int intBuffer[bufferSize];
- char charBuffer[bufferSize];
- BinaryFormat::readHeaderValue(dict, dictSize, key, intBuffer, bufferSize);
- for (int i = 0; i < bufferSize; ++i) {
- charBuffer[i] = intBuffer[i];
- }
- // If not a number, return S_INT_MIN
- if (!isdigit(charBuffer[0])) return S_INT_MIN;
- return atoi(charBuffer);
-}
-
AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
int *pos) {
const int msb = dict[(*pos)++];
@@ -265,18 +121,6 @@ AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *
return ((msb & 0x7F) << 8) | dict[(*pos)++];
}
-inline float BinaryFormat::getMultiWordCostMultiplier(const uint8_t *const dict,
- const int dictSize) {
- const int headerValue = readHeaderValueInt(dict, dictSize, "MULTIPLE_WORDS_DEMOTION_RATE");
- if (headerValue == S_INT_MIN) {
- return 1.0f;
- }
- if (headerValue <= 0) {
- return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
- }
- return 100.0f / static_cast<float>(headerValue);
-}
-
inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) {
return dict[(*pos)++];
}
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/core/dictionary/byte_array_utils.h
index 832b74725..d3321f624 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h
+++ b/native/jni/src/suggest/core/dictionary/byte_array_utils.h
@@ -116,8 +116,8 @@ class ByteArrayUtils {
* Reads code points until the terminator is found.
*/
// Returns the length of the string.
- static int readStringAndAdvancePosition(const uint8_t *const buffer, int *const pos,
- int *const outBuffer, const int maxLength) {
+ static int readStringAndAdvancePosition(const uint8_t *const buffer,
+ const int maxLength, int *const outBuffer, int *const pos) {
int length = 0;
int codePoint = readCodePointAndAdvancePosition(buffer, pos);
while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
@@ -129,7 +129,7 @@ class ByteArrayUtils {
// Advances the position and returns the length of the string.
static int advancePositionToBehindString(
- const uint8_t *const buffer, int *const pos, const int maxLength) {
+ const uint8_t *const buffer, const int maxLength, int *const pos) {
int length = 0;
int codePoint = readCodePointAndAdvancePosition(buffer, pos);
while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
@@ -138,6 +138,39 @@ class ByteArrayUtils {
return length;
}
+ // Returns an integer less than, equal to, or greater than zero when string starting from pos
+ // in buffer is less than, match, or is greater than charArray.
+ static AK_FORCE_INLINE int compareStringInBufferWithCharArray(const uint8_t *const buffer,
+ const char *const charArray, const int maxLength, int *const pos) {
+ int index = 0;
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ const uint8_t *const uint8CharArrayForComparison =
+ reinterpret_cast<const uint8_t *>(charArray);
+ while (NOT_A_CODE_POINT != codePoint
+ && '\0' != uint8CharArrayForComparison[index] && index < maxLength) {
+ if (codePoint != uint8CharArrayForComparison[index]) {
+ // Different character is found.
+ // Skip the rest of the string in the buffer.
+ advancePositionToBehindString(buffer, maxLength - index, pos);
+ return codePoint - uint8CharArrayForComparison[index];
+ }
+ // Advance
+ codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ ++index;
+ }
+ if (NOT_A_CODE_POINT != codePoint && index < maxLength) {
+ // Skip the rest of the string in the buffer.
+ advancePositionToBehindString(buffer, maxLength - index, pos);
+ }
+ if (NOT_A_CODE_POINT == codePoint && '\0' == uint8CharArrayForComparison[index]) {
+ // When both of the last characters are terminals, we consider the string in the buffer
+ // matches the given char array
+ return 0;
+ } else {
+ return codePoint - uint8CharArrayForComparison[index];
+ }
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 2d4ad5df5..561e22d2d 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -33,11 +33,10 @@
namespace latinime {
Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust)
- : mBinaryDicitonaryInfo(static_cast<const uint8_t *>(dict), dictSize),
+ : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize),
mDictSize(dictSize),
- mDictFlags(BinaryFormat::getFlags(mBinaryDicitonaryInfo.getDictBuf(), dictSize)),
mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
- mBigramDictionary(new BigramDictionary(&mBinaryDicitonaryInfo)),
+ mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
}
@@ -85,7 +84,7 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
}
int Dictionary::getProbability(const int *word, int length) const {
- const uint8_t *const root = mBinaryDicitonaryInfo.getDictRoot();
+ const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot();
int pos = BinaryFormat::getTerminalPosition(root, word, length,
false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == pos) {
@@ -112,8 +111,4 @@ bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2,
return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
}
-int Dictionary::getDictFlags() const {
- return mDictFlags;
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 1f25080b1..151f26183 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -66,22 +66,20 @@ class Dictionary {
int getProbability(const int *word, int length) const;
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const {
- return &mBinaryDicitonaryInfo;
+ return &mBinaryDictionaryInfo;
}
int getDictSize() const { return mDictSize; }
int getMmapFd() const { return mMmapFd; }
int getDictBufAdjust() const { return mDictBufAdjust; }
- int getDictFlags() const;
virtual ~Dictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
- const BinaryDictionaryInfo mBinaryDicitonaryInfo;
+ const BinaryDictionaryInfo mBinaryDictionaryInfo;
// Used only for the mmap version of dictionary loading, but we use these as dummy variables
// also for the malloc version.
const int mDictSize;
- const int mDictFlags;
const int mMmapFd;
const int mDictBufAdjust;
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index f53e56ef1..af378b1b7 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -16,8 +16,10 @@
#include "suggest/core/dictionary/digraph_utils.h"
+#include <cstdlib>
+
#include "defines.h"
-#include "suggest/core/dictionary/binary_format.h"
+#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -33,8 +35,8 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
{ DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
- const int dictFlags, const int compositeGlyphCodePoint) {
- const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
+ const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) {
+ const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header);
if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
return true;
}
@@ -43,24 +45,16 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
// Returns the digraph type associated with the given dictionary.
/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
- const int dictFlags) {
- if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
+ const BinaryDictionaryHeader *const header) {
+ if (header->requiresGermanUmlautProcessing()) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
- if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
+ if (header->requiresFrenchLigatureProcessing()) {
return DIGRAPH_TYPE_FRENCH_LIGATURES;
}
return DIGRAPH_TYPE_NONE;
}
-// Retrieves the set of all digraphs associated with the given dictionary flags.
-// Returns the size of the digraph array, or 0 if none exist.
-/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
- const int dictFlags, const DigraphUtils::digraph_t **const digraphs) {
- const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
- return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs);
-}
-
// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
// (which specifies the first or second codepoint in the digraph).
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
@@ -124,7 +118,7 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
const DigraphUtils::digraph_t *digraphs = 0;
const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
const int digraphsSize =
- DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
+ DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs);
for (int i = 0; i < digraphsSize; i++) {
if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) {
return &digraphs[i];
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h
index c1205940c..9d74fe3a6 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.h
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h
@@ -21,6 +21,8 @@
namespace latinime {
+class BinaryDictionaryHeader;
+
class DigraphUtils {
public:
typedef enum {
@@ -37,17 +39,14 @@ class DigraphUtils {
typedef struct { int first; int second; int compositeGlyph; } digraph_t;
- static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint);
- static int getAllDigraphsForDictionaryAndReturnSize(
- const int dictFlags, const digraph_t **const digraphs);
- static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint,
- const DigraphCodePointIndex digraphCodePointIndex);
+ static bool hasDigraphForCodePoint(
+ const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint);
static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
const DigraphCodePointIndex digraphCodePointIndex);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
- static DigraphType getDigraphTypeForDictionary(const int dictFlags);
+ static DigraphType getDigraphTypeForDictionary(const BinaryDictionaryHeader *const header);
static int getAllDigraphsForDigraphTypeAndReturnSize(
const DigraphType digraphType, const digraph_t **const digraphs);
static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint);
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index c398caefa..774d6074e 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -19,6 +19,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/dictionary.h"
@@ -28,9 +29,8 @@ namespace latinime {
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
int prevWordLength, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
- mMultiWordCostMultiplier = BinaryFormat::getMultiWordCostMultiplier(
- mDictionary->getBinaryDictionaryInfo()->getDictBuf(),
- mDictionary->getDictSize());
+ mMultiWordCostMultiplier = mDictionary->getBinaryDictionaryInfo()
+ ->getHeader()->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
mPrevWordPos = NOT_VALID_WORD;
@@ -63,10 +63,6 @@ const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const
return mDictionary->getBinaryDictionaryInfo();
}
-int DicTraverseSession::getDictFlags() const {
- return mDictionary->getDictFlags();
-}
-
void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) {
mDicNodesCache.reset(nextActiveCacheSize, maxWords);
mMultiBigramMap.clear();
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 630b3b59b..f95a0b23d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -77,7 +77,6 @@ class DicTraverseSession {
// TODO: Remove
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
- int getDictFlags() const;
//--------------------
// getters and setters
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 1f108e400..6c4a6c166 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -19,6 +19,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/shortcut_utils.h"
@@ -294,7 +295,8 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
processDicNodeAsMatch(traverseSession, childDicNode);
continue;
}
- if (DigraphUtils::hasDigraphForCodePoint(traverseSession->getDictFlags(),
+ if (DigraphUtils::hasDigraphForCodePoint(
+ traverseSession->getBinaryDictionaryInfo()->getHeader(),
childDicNode->getNodeCodePoint())) {
correctionDicNode.initByCopy(childDicNode);
correctionDicNode.advanceDigraphIndex();