diff options
author | 2012-01-16 15:15:53 +0900 | |
---|---|---|
committer | 2012-01-16 15:59:33 +0900 | |
commit | 4c0eca6e416485be61d7fddcad1e1552444daf85 (patch) | |
tree | 0719be14e726261f834652a7c353dfaeb9db995f | |
parent | 6d4198107bb0bcc383d7324fa39098351b5eceda (diff) | |
download | latinime-4c0eca6e416485be61d7fddcad1e1552444daf85.tar.gz latinime-4c0eca6e416485be61d7fddcad1e1552444daf85.tar.xz latinime-4c0eca6e416485be61d7fddcad1e1552444daf85.zip |
Read multi-byte char group counts
Change-Id: Idc62382f1c814e9bd1466c9f7dda1fcc8ba4137d
-rw-r--r-- | native/src/binary_format.h | 4 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 5 |
2 files changed, 6 insertions, 3 deletions
diff --git a/native/src/binary_format.h b/native/src/binary_format.h index 9944fa2bd..1d74998f6 100644 --- a/native/src/binary_format.h +++ b/native/src/binary_format.h @@ -61,7 +61,9 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) { } inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) { - return dict[(*pos)++]; + const int msb = dict[(*pos)++]; + if (msb < 0x80) return msb; + return ((msb & 0x7F) << 8) | dict[(*pos)++]; } inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) { diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index cd73fe3f8..ca7f0be0c 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -507,9 +507,10 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor int maxFreq = -1; const uint8_t* const root = DICT_ROOT; - mStackChildCount[0] = root[0]; + int startPos = 0; + mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos); mStackInputIndex[0] = 0; - mStackSiblingPos[0] = 1; + mStackSiblingPos[0] = startPos; while (depth >= 0) { const int charGroupCount = mStackChildCount[depth]; int pos = mStackSiblingPos[depth]; |