diff options
Diffstat (limited to 'native/jni/src')
25 files changed, 455 insertions, 444 deletions
diff --git a/native/jni/src/additional_proximity_chars.cpp b/native/jni/src/additional_proximity_chars.cpp index 224f020f2..de8764678 100644 --- a/native/jni/src/additional_proximity_chars.cpp +++ b/native/jni/src/additional_proximity_chars.cpp @@ -38,4 +38,4 @@ const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_O[EN_US_ADDITIONAL_O_SI const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_U[EN_US_ADDITIONAL_U_SIZE] = { 'a', 'e', 'i', 'o' }; -} +} // namespace latinime diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h index 82c31f860..ba76cfced 100644 --- a/native/jni/src/additional_proximity_chars.h +++ b/native/jni/src/additional_proximity_chars.h @@ -45,7 +45,7 @@ class AdditionalProximityChars { } public: - static int getAdditionalCharsSize(const std::string* locale_str, const int32_t c) { + static int getAdditionalCharsSize(const std::string *locale_str, const int32_t c) { if (!isEnLocale(locale_str)) { return 0; } @@ -65,7 +65,7 @@ class AdditionalProximityChars { } } - static const int32_t* getAdditionalChars(const std::string *locale_str, const int32_t c) { + static const int32_t *getAdditionalChars(const std::string *locale_str, const int32_t c) { if (!isEnLocale(locale_str)) { return 0; } @@ -89,7 +89,5 @@ class AdditionalProximityChars { return getAdditionalCharsSize(locale_str, c) > 0; } }; - -} - +} // namespace latinime #endif // LATINIME_ADDITIONAL_PROXIMITY_CHARS_H diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp index 31f1e18a8..c91e5f741 100644 --- a/native/jni/src/basechars.cpp +++ b/native/jni/src/basechars.cpp @@ -18,7 +18,7 @@ namespace latinime { -/** +/* * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c * if c is not a combined character, or the base character if it @@ -187,8 +187,6 @@ const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, }; - // generated with: // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' - } // namespace latinime diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 8057e410a..220171127 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -1,21 +1,20 @@ /* -** -** Copyright 2010, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#include <string.h> + * Copyright (C) 2010, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstring> #define LOG_TAG "LatinIME: bigram_dictionary.cpp" @@ -103,7 +102,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name - const uint8_t* const root = DICT; + const uint8_t *const root = DICT; int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams @@ -127,7 +126,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // codesSize == 0 means we are trying to find bigram predictions. if (codesSize < 1 || checkFirstCharacter(bigramBuffer, inputCodes)) { - const int bigramFreqTemp = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; // Due to space constraints, the frequency for bigrams is approximate - the lower the // unigram frequency, the worse the precision. The theoritical maximum error in // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4 @@ -140,7 +139,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in ++bigramCount; } } - } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); + } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); return bigramCount; } @@ -149,14 +148,14 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return 0; - const uint8_t* const root = DICT; + const uint8_t *const root = DICT; int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_VALID_WORD == pos) return 0; const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - if (0 == (flags & UnigramDictionary::FLAG_HAS_BIGRAMS)) return 0; - if (0 == (flags & UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS)) { + if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; + if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { BinaryFormat::getCharCodeAndForwardPointer(root, &pos); } else { pos = BinaryFormat::skipOtherCharacters(root, pos); @@ -170,7 +169,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const { memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE); - const uint8_t* const root = DICT; + const uint8_t *const root = DICT; int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (0 == pos) { @@ -183,12 +182,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p int bigramFlags; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + const int frequency = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); (*map)[bigramPos] = frequency; setInFilter(filter, bigramPos); - } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); + } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const { @@ -209,7 +208,7 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const { - const uint8_t* const root = DICT; + const uint8_t *const root = DICT; int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; @@ -224,7 +223,7 @@ bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const in if (bigramPos == nextWordPos) { return true; } - } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); + } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); return false; } diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 0b3577ad8..d676cca63 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -24,7 +24,6 @@ namespace latinime { -class Dictionary; class BigramDictionary { public: BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); @@ -53,7 +52,5 @@ class BigramDictionary { // TODO: Re-implement proximity correction for bigram correction static const int MAX_ALTERNATIVES = 1; }; - } // namespace latinime - #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 474c854fe..2ee4077c1 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -18,13 +18,47 @@ #define LATINIME_BINARY_FORMAT_H #include <limits> +#include <map> #include "bloom_filter.h" #include "char_utils.h" -#include "unigram_dictionary.h" namespace latinime { class BinaryFormat { + public: + // Mask and flags for children address type selection. + static const int MASK_GROUP_ADDRESS_TYPE = 0xC0; + static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; + static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; + static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; + static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; + + // Flag for single/multiple char group + static const int FLAG_HAS_MULTIPLE_CHARS = 0x20; + + // Flag for terminal groups + static const int FLAG_IS_TERMINAL = 0x10; + + // Flag for shortcut targets presence + static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; + // Flag for bigram presence + static const int FLAG_HAS_BIGRAMS = 0x04; + + // Attribute (bigram/shortcut) related flags: + // Flag for presence of more attributes + static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; + // Flag for sign of offset. If this flag is set, the offset value must be negated. + static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; + + // Mask for attribute frequency, stored on 4 bits inside the flags byte. + static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F; + + // Mask and flags for attribute address type selection. + static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; + static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; + static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; + static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; @@ -46,29 +80,29 @@ class BinaryFormat { const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; const static int SHORTCUT_LIST_SIZE_SIZE = 2; - static int detectFormat(const uint8_t* const dict); - static unsigned int getHeaderSize(const uint8_t* const dict); - static unsigned int getFlags(const uint8_t* const dict); - static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos); - static uint8_t getFlagsAndForwardPointer(const uint8_t* const dict, int* pos); - static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos); - static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos); - static int skipOtherCharacters(const uint8_t* const dict, const int pos); + static int detectFormat(const uint8_t *const dict); + static unsigned int getHeaderSize(const uint8_t *const dict); + static unsigned int getFlags(const uint8_t *const dict); + static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); + static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); + static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos); + static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); + static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipFrequency(const uint8_t flags, const int pos); - static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos); - static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos); - static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos); - static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags, + static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); - static int readChildrenPosition(const uint8_t* const dict, const uint8_t flags, const int pos); + static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); static bool hasChildrenInFlags(const uint8_t flags); - static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags, + static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos); - static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord, + static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, const int length, const bool forceLowerCaseSearch); - static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, - uint16_t* outWord, int* outUnigramFrequency); + static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, + uint16_t *outWord, int *outUnigramFrequency); static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq); @@ -83,7 +117,7 @@ class BinaryFormat { const static unsigned int NO_FLAGS = 0; }; -inline int BinaryFormat::detectFormat(const uint8_t* const dict) { +inline int BinaryFormat::detectFormat(const uint8_t *const dict) { // The magic number is stored big-endian. const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3]; switch (magicNumber) { @@ -105,7 +139,7 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) { } } -inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) { +inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) { switch (detectFormat(dict)) { case 1: return NO_FLAGS; @@ -114,7 +148,7 @@ inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) { } } -inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) { +inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) { switch (detectFormat(dict)) { case 1: return FORMAT_VERSION_1_HEADER_SIZE; @@ -126,17 +160,17 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) { } } -inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) { +inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) { const int msb = dict[(*pos)++]; if (msb < 0x80) return msb; return ((msb & 0x7F) << 8) | dict[(*pos)++]; } -inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) { +inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) { return dict[(*pos)++]; } -inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos) { +inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) { const int origin = *pos; const int32_t character = dict[origin]; if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { @@ -155,12 +189,12 @@ inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const d } } -inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t* const dict, +inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos) { return dict[pos]; } -inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const int pos) { +inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) { int currentPos = pos; int32_t character = dict[currentPos++]; while (CHARACTER_ARRAY_TERMINATOR != character) { @@ -174,22 +208,22 @@ inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const in static inline int attributeAddressSize(const uint8_t flags) { static const int ATTRIBUTE_ADDRESS_SHIFT = 4; - return (flags & UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; + return (flags & BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; /* Note: this is a value-dependant optimization of what may probably be more readably written this way: - switch (flags * UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) { - case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1; - case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2; - case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3; + switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) { + case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1; + case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2; + case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3; default: return 0; } */ } -static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) { +static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) { int currentPos = pos; uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); - while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) { + while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) { currentPos += attributeAddressSize(flags); flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); } @@ -199,11 +233,11 @@ static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) static inline int childrenAddressSize(const uint8_t flags) { static const int CHILDREN_ADDRESS_SHIFT = 6; - return (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT; + return (BinaryFormat::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT; /* See the note in attributeAddressSize. The same applies here */ } -static inline int shortcutByteSize(const uint8_t* const dict, const int pos) { +static inline int shortcutByteSize(const uint8_t *const dict, const int pos) { return ((int)(dict[pos] << 8)) + (dict[pos + 1]); } @@ -212,28 +246,28 @@ inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos } inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { - return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos; + return FLAG_IS_TERMINAL & flags ? pos + 1 : pos; } -inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos) { - if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) { + if (FLAG_HAS_SHORTCUT_TARGETS & flags) { return pos + shortcutByteSize(dict, pos); } else { return pos; } } -inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos) { - if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) { + if (FLAG_HAS_BIGRAMS & flags) { return skipExistingBigrams(dict, pos); } else { return pos; } } -inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; @@ -242,7 +276,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint return newPos; } -inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict, +inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) { int currentPos = pos; currentPos = skipChildrenPosition(flags, currentPos); @@ -250,18 +284,18 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict, return currentPos; } -inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos) { int offset = 0; - switch (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) { - case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + switch (MASK_GROUP_ADDRESS_TYPE & flags) { + case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: offset = dict[pos]; break; - case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: offset = dict[pos] << 8; offset += dict[pos + 1]; break; - case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: offset = dict[pos] << 16; offset += dict[pos + 1] << 8; offset += dict[pos + 2]; @@ -275,32 +309,31 @@ inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const u } inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) { - return (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS - != (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)); + return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags)); } -inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* const dict, +inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos) { int offset = 0; const int origin = *pos; - switch (UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { - case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: + switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { + case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: offset = dict[origin]; *pos = origin + 1; break; - case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: + case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: offset = dict[origin] << 8; offset += dict[origin + 1]; *pos = origin + 2; break; - case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: + case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: offset = dict[origin] << 16; offset += dict[origin + 1] << 8; offset += dict[origin + 2]; *pos = origin + 3; break; } - if (UnigramDictionary::FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) { + if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) { return origin - offset; } else { return origin + offset; @@ -309,8 +342,8 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. -inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, - const int32_t* const inWord, const int length, const bool forceLowerCaseSearch) { +inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, + const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) { int pos = 0; int wordPos = 0; @@ -332,7 +365,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, // char within a node, so either we found our match in this node, or there is // no match and we can return NOT_VALID_WORD. So we will check all the characters // in this character group indeed does match. - if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) { + if (FLAG_HAS_MULTIPLE_CHARS & flags) { character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); while (NOT_A_CHARACTER != character) { ++wordPos; @@ -350,14 +383,13 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, // If we don't match the length AND don't have children, then a word in the // dictionary fully matches a prefix of the searched word but not the full word. ++wordPos; - if (UnigramDictionary::FLAG_IS_TERMINAL & flags) { + if (FLAG_IS_TERMINAL & flags) { if (wordPos == length) { return charGroupPos; } - pos = BinaryFormat::skipFrequency(UnigramDictionary::FLAG_IS_TERMINAL, pos); + pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos); } - if (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS - == (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)) { + if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) { return NOT_VALID_WORD; } // We have children and we are still shorter than the word we are searching for, so @@ -367,7 +399,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, break; } else { // This chargroup does not match, so skip the remaining part and go to the next. - if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) { + if (FLAG_HAS_MULTIPLE_CHARS & flags) { pos = BinaryFormat::skipOtherCharacters(root, pos); } pos = BinaryFormat::skipFrequency(flags, pos); @@ -396,8 +428,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, * outUnigramFrequency: a pointer to an int to write the frequency into. * Return value : the length of the word, of 0 if the word was not found. */ -inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address, - const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) { +inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, + const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) { int pos = 0; int wordPos = 0; @@ -420,7 +452,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a // We found the address. Copy the rest of the word in the buffer and return // the length. outWord[wordPos] = character; - if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) { + if (FLAG_HAS_MULTIPLE_CHARS & flags) { int32_t nextChar = getCharCodeAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug @@ -435,7 +467,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a } // We need to skip past this char group, so skip any remaining chars after the // first and possibly the frequency. - if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) { + if (FLAG_HAS_MULTIPLE_CHARS & flags) { pos = skipOtherCharacters(root, pos); } pos = skipFrequency(flags, pos); @@ -443,8 +475,8 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a // The fact that this group has children is very important. Since we already know // that this group does not match, if it has no children we know it is irrelevant // to what we are searching for. - const bool hasChildren = (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != - (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)); + const bool hasChildren = (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != + (MASK_GROUP_ADDRESS_TYPE & flags)); // We will write in `found' whether we have passed the children address we are // searching for. For example if we search for "beer", the children of b are less // than the address we are searching for and the children of c are greater. When we @@ -484,7 +516,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer outWord[wordPos] = lastChar; - if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & lastFlags) { + if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { int32_t nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); int charCount = maxDepth; @@ -540,8 +572,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const // 0 for the bigram frequency represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. // See makedict.BinaryDictInputOutput for details. - const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); - return (int)(unigramFreq + (bigramFreq + 1) * stepSize); + const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); + return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize); } // This returns a probability in log space. @@ -557,7 +589,5 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int, return backoff(unigramFreq); } } - } // namespace latinime - #endif // LATINIME_BINARY_FORMAT_H diff --git a/native/jni/src/bloom_filter.h b/native/jni/src/bloom_filter.h index 7ae6a1fa4..47177dcba 100644 --- a/native/jni/src/bloom_filter.h +++ b/native/jni/src/bloom_filter.h @@ -32,7 +32,5 @@ static inline bool isInFilter(const uint8_t *filter, const int position) { const unsigned int bucket = position % BIGRAM_FILTER_MODULO; return filter[bucket >> 3] & (1 << (bucket & 0x7)); } - } // namespace latinime - #endif // LATINIME_BLOOM_FILTER_H diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp index a31a0632c..45d49b087 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/char_utils.cpp @@ -14,7 +14,9 @@ * limitations under the License. */ -#include <stdlib.h> +#include <cstdlib> + +#include "char_utils.h" namespace latinime { @@ -895,5 +897,4 @@ unsigned short latin_tolower(unsigned short c) { compare_pair_capital); return p ? p->small : c; } - } // namespace latinime diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index 21dca9a0a..edd96bbb0 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -62,7 +62,5 @@ inline static unsigned short toLowerCase(const unsigned short c) { inline static unsigned short toBaseLowerCase(const unsigned short c) { return toLowerCase(toBaseChar(c)); } - } // namespace latinime - #endif // LATINIME_CHAR_UTILS_H diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 827067b9f..ea4bddae2 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -14,23 +14,22 @@ * limitations under the License. */ -#include <assert.h> -#include <ctype.h> -#include <math.h> -#include <stdio.h> -#include <string.h> +#include <cassert> +#include <cctype> +#include <cmath> +#include <cstring> #define LOG_TAG "LatinIME: correction.cpp" #include "char_utils.h" #include "correction.h" #include "defines.h" -#include "dictionary.h" -#include "proximity_info.h" #include "proximity_info_state.h" namespace latinime { +class ProximityInfo; + ///////////////////////////// // edit distance funcitons // ///////////////////////////// @@ -95,11 +94,11 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD ////////////////////// // inline functions // ////////////////////// -static const char QUOTE = '\''; +static const char SINGLE_QUOTE = '\''; -inline bool Correction::isQuote(const unsigned short c) { +inline bool Correction::isSingleQuote(const unsigned short c) { const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); - return (c == QUOTE && userTypedChar != QUOTE); + return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE); } //////////////// @@ -326,7 +325,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mDistances[mOutputIndex] = NOT_A_DISTANCE; // Skip checking this node - if (mNeedsToTraverseAllNodes || isQuote(c)) { + if (mNeedsToTraverseAllNodes || isSingleQuote(c)) { bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { // TODO: Do not check the proximity if EditDistance exceeds the threshold @@ -344,7 +343,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance( mInputIndex, proximityIndex); } - if (!isQuote(c)) { + if (!isSingleQuote(c)) { incrementInputIndex(); incremented = true; } @@ -633,7 +632,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( Correction::~Correction() { } -inline static int getQuoteCount(const unsigned short* word, const int length) { +inline static int getQuoteCount(const unsigned short *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { if(word[i] == '\'') { @@ -653,7 +652,7 @@ inline static bool isUpperCase(unsigned short c) { /* static */ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, - const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction, + const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction, const int inputLength) { const int excessivePos = correction->getExcessivePos(); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; @@ -677,7 +676,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // TODO: use mExcessiveCount const int matchCount = inputLength - correction->mProximityCount - excessiveCount; - const unsigned short* word = correction->mWord; + const unsigned short *word = correction->mWord; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) @@ -791,7 +790,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex static const float MIN = 0.3f; static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; static const float R2 = HALF_SCORE_SQUARED_RADIUS; - const float x = (float)squaredDistance + const float x = static_cast<float>(squaredDistance) / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; const float factor = max((x < R1) ? (A * (R1 - x) + B * x) / R1 @@ -916,7 +915,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex /* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, - const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { + const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; @@ -1046,10 +1045,10 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( /* Damerau-Levenshtein distance */ inline static int editDistanceInternal( - int* editDistanceTable, const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength) { + int *editDistanceTable, const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int* dp = editDistanceTable; + int *dp = editDistanceTable; const int li = beforeLength + 1; const int lo = afterLength + 1; for (int i = 0; i < li; ++i) { @@ -1085,8 +1084,8 @@ inline static int editDistanceInternal( return dp[li * lo - 1]; } -int Correction::RankingAlgorithm::editDistance(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength) { +int Correction::RankingAlgorithm::editDistance(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); } @@ -1114,8 +1113,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before, // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. /* static */ -float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength, +float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength, const int score) { if (0 == beforeLength || 0 == afterLength) { return 0; @@ -1133,13 +1132,14 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* be } const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE - * pow((float)TYPED_LETTER_MULTIPLIER, - (float)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; + * pow(static_cast<float>(TYPED_LETTER_MULTIPLIER), + static_cast<float>(min(beforeLength, afterLength - spaceCount))) + * FULL_WORD_MULTIPLIER; // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, // so, 0 <= distance / afterLength <= 1 - const float weight = 1.0 - (float) distance / afterLength; + const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); return (score / maxScore) * weight; } } // namespace latinime diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index ae7b3a5f8..81623a46b 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -17,7 +17,7 @@ #ifndef LATINIME_CORRECTION_H #define LATINIME_CORRECTION_H -#include <assert.h> +#include <cassert> #include <stdint.h> #include "correction_state.h" @@ -138,9 +138,9 @@ class Correction { int getFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word); - int getFinalProbability(const int probability, unsigned short **word, int* wordLength); + int getFinalProbability(const int probability, unsigned short **word, int *wordLength); int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int* wordLength, const int inputLength); + int *wordLength, const int inputLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); @@ -163,15 +163,15 @@ class Correction { class RankingAlgorithm { public: static int calculateFinalProbability(const int inputIndex, const int depth, - const int probability, int *editDistanceTable, const Correction* correction, + const int probability, int *editDistanceTable, const Correction *correction, const int inputLength); static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const Correction* correction, const bool isSpaceProximity, + const int wordCount, const Correction *correction, const bool isSpaceProximity, const unsigned short *word); - static float calcNormalizedScore(const unsigned short* before, const int beforeLength, - const unsigned short* after, const int afterLength, const int score); - static int editDistance(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength); + static float calcNormalizedScore(const unsigned short *before, const int beforeLength, + const unsigned short *after, const int afterLength, const int score); + static int editDistance(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength); private: static const int CODE_SPACE = ' '; static const int MAX_INITIAL_SCORE = 255; @@ -184,7 +184,7 @@ class Correction { proximityInfo, inputCodes, inputLength, xCoordinates, yCoordinates); } - const unsigned short* getPrimaryInputWord() const { + const unsigned short *getPrimaryInputWord() const { return mProximityInfoState.getPrimaryInputWord(); } @@ -197,13 +197,13 @@ class Correction { inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); - inline bool isQuote(const unsigned short c); + inline bool isSingleQuote(const unsigned short c); inline CorrectionType processSkipChar( const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline int getFinalProbabilityInternal(const int probability, unsigned short **word, - int* wordLength, const int inputLength); + int *wordLength, const int inputLength); static const int TYPED_LETTER_MULTIPLIER = 2; static const int FULL_WORD_MULTIPLIER = 2; diff --git a/native/jni/src/correction_state.h b/native/jni/src/correction_state.h index 5b2cbd3a2..a63d4aa94 100644 --- a/native/jni/src/correction_state.h +++ b/native/jni/src/correction_state.h @@ -79,6 +79,5 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos state->mSkipping = false; state->mAdditionalProximityMatching = false; } - } // namespace latinime #endif // LATINIME_CORRECTION_STATE_H diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h index 376ba59d9..2168d6672 100644 --- a/native/jni/src/debug.h +++ b/native/jni/src/debug.h @@ -1,26 +1,25 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * Copyright (C) 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef LATINIME_DEBUG_H #define LATINIME_DEBUG_H #include "defines.h" -static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output, +static inline unsigned char *convertToUnibyteString(unsigned short *input, unsigned char *output, const unsigned int length) { unsigned int i = 0; for (; i <= length && input[i] != 0; ++i) @@ -29,8 +28,8 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig return output; } -static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input, - unsigned char* output, const unsigned int length, unsigned char c) { +static inline unsigned char *convertToUnibyteStringAndReplaceLastChar(unsigned short *input, + unsigned char *output, const unsigned int length, unsigned char c) { unsigned int i = 0; for (; i <= length && input[i] != 0; ++i) output[i] = input[i] & 0xFF; @@ -39,7 +38,7 @@ static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned s return output; } -static inline void LOGI_S16(unsigned short* string, const unsigned int length) { +static inline void LOGI_S16(unsigned short *string, const unsigned int length) { unsigned char tmp_buffer[length]; convertToUnibyteString(string, tmp_buffer, length); AKLOGI(">> %s", tmp_buffer); @@ -49,7 +48,7 @@ static inline void LOGI_S16(unsigned short* string, const unsigned int length) { // usleep(10); } -static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int length, +static inline void LOGI_S16_PLUS(unsigned short *string, const unsigned int length, unsigned char c) { unsigned char tmp_buffer[length+1]; convertToUnibyteStringAndReplaceLastChar(string, tmp_buffer, length, c); @@ -58,7 +57,7 @@ static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int leng // usleep(10); } -static inline void printDebug(const char* tag, int* codes, int codesSize, int MAX_PROXIMITY_CHARS) { +static inline void printDebug(const char *tag, int *codes, int codesSize, int MAX_PROXIMITY_CHARS) { unsigned char *buf = (unsigned char*)malloc((1 + codesSize) * sizeof(*buf)); buf[codesSize] = 0; @@ -68,5 +67,4 @@ static inline void printDebug(const char* tag, int* codes, int codesSize, int MA free(buf); } - #endif // LATINIME_DEBUG_H diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index c7d3bf313..31dd61e30 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -1,42 +1,79 @@ /* -** -** Copyright 2010, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * Copyright (C) 2010, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef LATINIME_DEFINES_H #define LATINIME_DEFINES_H #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#include <cutils/log.h> -#define AKLOGE ALOGE -#define AKLOGI ALOGI +#include <android/log.h> +#ifndef LOG_TAG +#define LOG_TAG "LatinIME: " +#endif +#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) +#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) +#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ + dumpResult(words, frequencies, maxWordCount, maxWordLength); } while(0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) #define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0) -static inline void dumpWord(const unsigned short* word, const int length) { +static inline void dumpWordInfo(const unsigned short *word, const int length, + const int rank, const int frequency) { static char charBuf[50]; + int i = 0; + for (; i < length; ++i) { + const unsigned short c = word[i]; + if (c == 0) { + break; + } + charBuf[i] = c; + } + charBuf[i] = 0; + if (i > 1) { + AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency); + } +} - for (int i = 0; i < length; ++i) { - charBuf[i] = word[i]; +static inline void dumpResult( + const unsigned short *outWords, const int *frequencies, const int maxWordCounts, + const int maxWordLength) { + AKLOGI("--- DUMP RESULT ---------"); + for (int i = 0; i < maxWordCounts; ++i) { + dumpWordInfo(&outWords[i * maxWordLength], maxWordLength, i, frequencies[i]); } - charBuf[length] = 0; - AKLOGI("[ %s ]", charBuf); + AKLOGI("-------------------------"); } -static inline void dumpWordInt(const int* word, const int length) { +static inline void dumpWord(const unsigned short *word, const int length) { + static char charBuf[50]; + int i = 0; + for (; i < length; ++i) { + const unsigned short c = word[i]; + if (c == 0) { + break; + } + charBuf[i] = c; + } + charBuf[i] = 0; + if (i > 1) { + AKLOGI("[ %s ]", charBuf); + } +} + +static inline void dumpWordInt(const int *word, const int length) { static char charBuf[50]; for (int i = 0; i < length; ++i) { @@ -49,6 +86,7 @@ static inline void dumpWordInt(const int* word, const int length) { #else #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) +#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) #define DUMP_WORD(word, length) #define DUMP_WORD_INT(word, length) #endif @@ -86,17 +124,18 @@ static inline void prof_out(void) { AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); } AKLOGI("Total time is %6.3f ms.", - profile_buf[PROF_BUF_SIZE - 1] * 1000 / (float)CLOCKS_PER_SEC); + profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); float all = 0; for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { all += profile_buf[i]; } if (all == 0) all = 1; for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - if (profile_buf[i] != 0) { + if (profile_buf[i]) { AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", i, (profile_buf[i] * 100 / all), - profile_buf[i] * 1000 / (float)CLOCKS_PER_SEC, profile_counter[i]); + profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), + profile_counter[i]); } } } @@ -116,10 +155,6 @@ static inline void prof_out(void) { #endif // FLAG_DO_PROFILE #ifdef FLAG_DBG -#include <cutils/log.h> -#ifndef LOG_TAG -#define LOG_TAG "LatinIME: " -#endif #define DEBUG_DICT true #define DEBUG_DICT_FULL false #define DEBUG_EDIT_DISTANCE false @@ -146,7 +181,6 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false - #endif // FLAG_DBG #ifndef U_SHORT_MAX @@ -313,5 +347,4 @@ typedef enum { // Additional proximity char which can differ by language. ADDITIONAL_PROXIMITY_CHAR } ProximityType; - #endif // LATINIME_DEFINES_H diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index f3166e75a..ee55cfa60 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -1,28 +1,29 @@ /* -** -** Copyright 2009, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#include <stdio.h> + * Copyright (C) 2009, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #define LOG_TAG "LatinIME: dictionary.cpp" +#include <stdint.h> + +#include "bigram_dictionary.h" #include "binary_format.h" #include "defines.h" #include "dictionary.h" #include "gesture_decoder_wrapper.h" +#include "unigram_dictionary.h" namespace latinime { @@ -55,6 +56,40 @@ Dictionary::~Dictionary() { delete mGestureDecoder; } +int Dictionary::getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, + int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, + int prevWordLength, int commitPoint, bool isGesture, + bool useFullEditDistance, unsigned short *outWords, + int *frequencies, int *spaceIndices, int *outputTypes) { + int result = 0; + if (isGesture) { + mGestureDecoder->setPrevWord(prevWordChars, prevWordLength); + result = mGestureDecoder->getSuggestions(proximityInfo, xcoordinates, ycoordinates, + times, pointerIds, codes, codesSize, commitPoint, + outWords, frequencies, spaceIndices, outputTypes); + if (DEBUG_DICT) { + DUMP_RESULT(outWords, frequencies, 18 /* MAX_WORDS */, MAX_WORD_LENGTH_INTERNAL); + } + return result; + } else { + std::map<int, int> bigramMap; + uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; + mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars, + prevWordLength, &bigramMap, bigramFilter); + result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, + ycoordinates, codes, codesSize, &bigramMap, bigramFilter, + useFullEditDistance, outWords, frequencies, outputTypes); + return result; + } +} + +int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize, + unsigned short *outWords, int *frequencies, int *outputTypes) const { + if (length <= 0) return 0; + return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, + outputTypes); +} + int Dictionary::getFrequency(const int32_t *word, int length) const { return mUnigramDictionary->getFrequency(word, length); } @@ -63,5 +98,4 @@ bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t int length2) const { return mBigramDictionary->isValidBigram(word1, length1, word2, length2); } - } // namespace latinime diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 7911403dc..ab238c824 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -17,18 +17,17 @@ #ifndef LATINIME_DICTIONARY_H #define LATINIME_DICTIONARY_H -#include <map> +#include <stdint.h> -#include "bigram_dictionary.h" -#include "char_utils.h" #include "defines.h" -#include "incremental_decoder_interface.h" -#include "proximity_info.h" -#include "unigram_dictionary.h" -#include "words_priority_queue_pool.h" namespace latinime { +class BigramDictionary; +class IncrementalDecoderInterface; +class ProximityInfo; +class UnigramDictionary; + class Dictionary { public: // Taken from SuggestedWords.java @@ -49,32 +48,10 @@ class Dictionary { int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes) { - int result = 0; - if (isGesture) { - mGestureDecoder->setPrevWord(prevWordChars, prevWordLength); - result = mGestureDecoder->getSuggestions(proximityInfo, xcoordinates, ycoordinates, - times, pointerIds, codes, codesSize, commitPoint, - outWords, frequencies, spaceIndices, outputTypes); - return result; - } else { - std::map<int, int> bigramMap; - uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; - mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars, - prevWordLength, &bigramMap, bigramFilter); - result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, - ycoordinates, codes, codesSize, &bigramMap, bigramFilter, - useFullEditDistance, outWords, frequencies, outputTypes); - return result; - } - } + int *frequencies, int *spaceIndices, int *outputTypes); int getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const { - if (length <= 0) return 0; - return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, - outputTypes); - } + unsigned short *outWords, int *frequencies, int *outputTypes) const; int getFrequency(const int32_t *word, int length) const; bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; @@ -82,7 +59,7 @@ class Dictionary { int getDictSize() const { return mDictSize; } int getMmapFd() const { return mMmapFd; } int getDictBufAdjust() const { return mDictBufAdjust; } - ~Dictionary(); + virtual ~Dictionary(); // public static utility methods // static inline methods should be defined in the header file @@ -113,5 +90,4 @@ inline int Dictionary::wideStrLen(unsigned short *str) { return end - str; } } // namespace latinime - #endif // LATINIME_DICTIONARY_H diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index a4a641160..cee408d46 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -14,18 +14,17 @@ * limitations under the License. */ -#include <assert.h> -#include <math.h> -#include <stdio.h> +#include <cassert> +#include <cmath> +#include <cstring> #include <string> #define LOG_TAG "LatinIME: proximity_info.cpp" #include "additional_proximity_chars.h" +#include "char_utils.h" #include "defines.h" -#include "dictionary.h" #include "proximity_info.h" -#include "proximity_info_state.h" namespace latinime { @@ -110,7 +109,7 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { if (DEBUG_PROXIMITY_INFO) { AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y); } - int32_t* proximityCharsArray = mProximityCharsArray; + int32_t *proximityCharsArray = mProximityCharsArray; for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) { if (DEBUG_PROXIMITY_INFO) { AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]); @@ -171,7 +170,7 @@ void ProximityInfo::calculateNearbyKeyCodes( return; } - const int32_t* additionalProximityChars = + const int32_t *additionalProximityChars = AdditionalProximityChars::getAdditionalChars(&mLocaleStr, primaryKey); for (int j = 0; j < additionalProximitySize; ++j) { const int32_t ac = additionalProximityChars[j]; @@ -216,7 +215,7 @@ int ProximityInfo::getKeyIndex(const int c) const { void ProximityInfo::getCenters(int *centerXs, int *centerYs, int *codeToKeyIndex, int *keyToCodeIndex, int *keyCount, int *keyWidth) const { *keyCount = KEY_COUNT; - *keyWidth = sqrt((float)MOST_COMMON_KEY_WIDTH_SQUARE); + *keyWidth = sqrt(static_cast<float>(MOST_COMMON_KEY_WIDTH_SQUARE)); for (int i = 0; i < KEY_COUNT; ++i) { const int code = mKeyCharCodes[i]; diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index d58935c6b..abd07dd3e 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -141,7 +141,5 @@ class ProximityInfo { int mCodeToKeyIndex[MAX_CHAR_CODE + 1]; // TODO: move to correction.h }; - } // namespace latinime - #endif // LATINIME_PROXIMITY_INFO_H diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index 149299eb6..86c8a697a 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -14,22 +14,19 @@ * limitations under the License. */ -#include <assert.h> +#include <cstring> // for memset() #include <stdint.h> -#include <string> #define LOG_TAG "LatinIME: proximity_info_state.cpp" -#include "additional_proximity_chars.h" #include "defines.h" -#include "dictionary.h" #include "proximity_info.h" #include "proximity_info_state.h" namespace latinime { void ProximityInfoState::initInputParams( - const ProximityInfo* proximityInfo, const int32_t* inputCodes, const int inputLength, - const int* xCoordinates, const int* yCoordinates) { + const ProximityInfo *proximityInfo, const int32_t *inputCodes, const int inputLength, + const int *xCoordinates, const int *yCoordinates) { mProximityInfo = proximityInfo; mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData(); mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare(); @@ -132,8 +129,8 @@ float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const { const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex); const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex); - const float inputX = (float)mInputXCoordinates[inputIndex]; - const float inputY = (float)mInputYCoordinates[inputIndex]; + const float inputX = static_cast<float>(mInputXCoordinates[inputIndex]); + const float inputY = static_cast<float>(mInputYCoordinates[inputIndex]); return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY); } } // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 717871c90..76d45516e 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -17,11 +17,9 @@ #ifndef LATINIME_PROXIMITY_INFO_STATE_H #define LATINIME_PROXIMITY_INFO_STATE_H -#include <assert.h> #include <stdint.h> #include <string> -#include "additional_proximity_chars.h" #include "char_utils.h" #include "defines.h" @@ -43,14 +41,14 @@ class ProximityInfoState { // Defined in proximity_info_state.cpp // ///////////////////////////////////////// void initInputParams( - const ProximityInfo* proximityInfo, const int32_t* inputCodes, const int inputLength, - const int* xCoordinates, const int* yCoordinates); + const ProximityInfo *proximityInfo, const int32_t *inputCodes, const int inputLength, + const int *xCoordinates, const int *yCoordinates); ///////////////////////////////////////// // Defined here // ///////////////////////////////////////// ProximityInfoState() {}; - inline const int* getProximityCharsAt(const int index) const { + inline const int *getProximityCharsAt(const int index) const { return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); } @@ -154,7 +152,7 @@ class ProximityInfoState { inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex]; } - inline const unsigned short* getPrimaryInputWord() const { + inline const unsigned short *getPrimaryInputWord() const { return mPrimaryInputWord; } @@ -215,7 +213,5 @@ class ProximityInfoState { int mInputLength; unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; }; - } // namespace latinime - #endif // LATINIME_PROXIMITY_INFO_STATE_H diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index c712f502d..d63364514 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -17,7 +17,7 @@ #ifndef LATINIME_TERMINAL_ATTRIBUTES_H #define LATINIME_TERMINAL_ATTRIBUTES_H -#include "unigram_dictionary.h" +#include "binary_format.h" namespace latinime { @@ -29,14 +29,14 @@ namespace latinime { class TerminalAttributes { public: class ShortcutIterator { - const uint8_t* const mDict; + const uint8_t *const mDict; bool mHasNextShortcutTarget; int mPos; public: - ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict), + ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags) : mDict(dict), mPos(pos) { - mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS)); + mHasNextShortcutTarget = (0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)); } inline bool hasNextShortcutTarget() const { @@ -46,10 +46,10 @@ class TerminalAttributes { // Gets the shortcut target itself as a uint16_t string. For parameters and return value // see BinaryFormat::getWordAtAddress. // TODO: make the output an uint32_t* to handle the whole unicode range. - inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) { + inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = - 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT); + 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); unsigned int i; for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos); @@ -63,12 +63,12 @@ class TerminalAttributes { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); - const uint8_t* const mDict; + const uint8_t *const mDict; const uint8_t mFlags; const int mStartPos; public: - TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) : + TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) : mDict(dict), mFlags(flags), mStartPos(pos) { } @@ -79,5 +79,4 @@ class TerminalAttributes { } }; } // namespace latinime - #endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 0ffb3eb63..b6b0210cc 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -1,32 +1,33 @@ /* -** -** Copyright 2010, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#include <assert.h> -#include <string.h> + * Copyright (C) 2010, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cassert> +#include <cstring> #define LOG_TAG "LatinIME: unigram_dictionary.cpp" +#include "binary_format.h" #include "char_utils.h" #include "defines.h" #include "dictionary.h" -#include "unigram_dictionary.h" - -#include "binary_format.h" +#include "proximity_info.h" #include "terminal_attributes.h" +#include "unigram_dictionary.h" +#include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -40,7 +41,7 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[ { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE // TODO: check the header -UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier, +UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags) : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier), @@ -68,7 +69,7 @@ static inline void addWord( // Return the replacement code point for a digraph, or 0 if none. int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int codesSize, - const digraph_t* const digraphs, const unsigned int digraphsSize) const { + const digraph_t *const digraphs, const unsigned int digraphsSize) const { // There can't be a digraph if we don't have at least 2 characters to examine if (i + 2 > codesSize) return false; @@ -103,7 +104,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, - const digraph_t* const digraphs, const unsigned int digraphsSize) const { + const digraph_t *const digraphs, const unsigned int digraphsSize) const { const int startIndex = codesDest - codesBuffer; if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) { @@ -222,7 +223,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { - short unsigned int* w = outWords + j * MAX_WORD_LENGTH; + short unsigned int *w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; (void)s; @@ -259,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(4); bool hasAutoCorrectionCandidate = false; - WordsPriorityQueue* masterQueue = queuePool->getMasterQueue(); + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); if (masterQueue->size() > 0) { float nsForMaster = masterQueue->getHighestNormalizedScore( correction->getPrimaryInputWord(), inputLength, 0, 0, 0); @@ -284,11 +285,11 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); + WordsPriorityQueue *queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { - WordsPriorityQueue::SuggestedWord* sw = queue->top(); + WordsPriorityQueue::SuggestedWord *sw = queue->top(); const int score = sw->mScore; - const unsigned short* word = sw->mWord; + const unsigned short *word = sw->mWord; const int wordLength = sw->mWordLength; float ns = Correction::RankingAlgorithm::calcNormalizedScore( correction->getPrimaryInputWord(), i, word, wordLength, score); @@ -383,7 +384,7 @@ inline void UnigramDictionary::onTerminal(const int probability, const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; int wordLength; - unsigned short* wordPointer; + unsigned short *wordPointer; if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); @@ -430,11 +431,11 @@ inline void UnigramDictionary::onTerminal(const int probability, int UnigramDictionary::getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool* queuePool, const int inputLength, + WordsPriorityQueuePool *queuePool, const int inputLength, const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) const { + int*wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_ABORT; } @@ -477,7 +478,7 @@ int UnigramDictionary::getSubStringSuggestion( // TODO: Remove the safety net above // ////////////////////////////////////////////// - unsigned short* tempOutputWord = 0; + unsigned short *tempOutputWord = 0; int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, @@ -508,7 +509,7 @@ int UnigramDictionary::getSubStringSuggestion( } } } - WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); + WordsPriorityQueue *queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); // TODO: Return the correct value depending on doAutoCompletion if (!queue || queue->size() <= 0) { return FLAG_MULTIPLE_SUGGEST_ABORT; @@ -579,10 +580,10 @@ int UnigramDictionary::getSubStringSuggestion( void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, - const int outputWordLength, int *freqArray, int* wordLengthArray, - unsigned short* outputWord) const { + const int outputWordLength, int *freqArray, int *wordLengthArray, + unsigned short *outputWord) const { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; @@ -660,7 +661,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate) const { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { @@ -705,10 +706,10 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, // In and out parameters may point to the same location. This function takes care // not to use any input parameters after it wrote into its outputs. static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, - const uint8_t* const root, const int startPos, - const uint16_t* const inWord, const int startInputIndex, - int32_t* outNewWord, int* outInputIndex, int* outPos) { - const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags)); + const uint8_t *const root, const int startPos, + const uint16_t *const inWord, const int startInputIndex, + int32_t *outNewWord, int *outInputIndex, int *outPos) { + const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t baseChar = toBaseLowerCase(character); @@ -743,8 +744,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, // It will compare the frequency to the max frequency, and if greater, will // copy the word into the output buffer. In output value maxFreq, it will // write the new maximum frequency if it changed. -static inline void onTerminalWordLike(const int freq, int32_t* newWord, const int length, - short unsigned int* outWord, int* maxFreq) { +static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, + short unsigned int *outWord, int *maxFreq) { if (freq > *maxFreq) { for (int q = 0; q < length; ++q) outWord[q] = newWord[q]; @@ -755,12 +756,12 @@ static inline void onTerminalWordLike(const int freq, int32_t* newWord, const in // Will find the highest frequency of the words like the one passed as an argument, // that is, everything that only differs by case/accents. -int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWord, - const int length, short unsigned int* outWord) const { +int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, + const int length, short unsigned int *outWord) const { int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; int depth = 0; int maxFreq = -1; - const uint8_t* const root = DICT_ROOT; + const uint8_t *const root = DICT_ROOT; int stackChildCount[MAX_WORD_LENGTH_INTERNAL]; int stackInputIndex[MAX_WORD_LENGTH_INTERNAL]; int stackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; @@ -781,7 +782,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor // into inputIndex if there is a match. const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord, inputIndex, newWord, &inputIndex, &pos); - if (isAlike && (FLAG_IS_TERMINAL & flags) && (inputIndex == length)) { + if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) { const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq); } @@ -816,15 +817,15 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor return maxFreq; } -int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const { - const uint8_t* const root = DICT_ROOT; +int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const { + const uint8_t *const root = DICT_ROOT; int pos = BinaryFormat::getTerminalPosition(root, inWord, length, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == pos) { return NOT_A_PROBABILITY; } const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags)); + const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); if (hasMultipleChars) { pos = BinaryFormat::skipOtherCharacters(root, pos); } else { @@ -872,8 +873,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children) // - FLAG_HAS_BIGRAMS: whether this node has bigrams or not const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos); - const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags)); - const bool isTerminalNode = (0 != (FLAG_IS_TERMINAL & flags)); + const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); + const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags)); bool needsToInvokeOnTerminal = false; @@ -991,5 +992,4 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, *newChildrenPosition = childrenPos; return true; } - } // namespace latinime diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index ac14fc0bc..6083f0175 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -19,53 +19,19 @@ #include <map> #include <stdint.h> -#include "correction.h" -#include "correction_state.h" #include "defines.h" -#include "proximity_info.h" -#include "words_priority_queue.h" -#include "words_priority_queue_pool.h" namespace latinime { +class Correction; +class ProximityInfo; class TerminalAttributes; +class WordsPriorityQueuePool; + class UnigramDictionary { typedef struct { int first; int second; int replacement; } digraph_t; public: - // Mask and flags for children address type selection. - static const int MASK_GROUP_ADDRESS_TYPE = 0xC0; - static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; - static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; - static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; - static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; - - // Flag for single/multiple char group - static const int FLAG_HAS_MULTIPLE_CHARS = 0x20; - - // Flag for terminal groups - static const int FLAG_IS_TERMINAL = 0x10; - - // Flag for shortcut targets presence - static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; - // Flag for bigram presence - static const int FLAG_HAS_BIGRAMS = 0x04; - - // Attribute (bigram/shortcut) related flags: - // Flag for presence of more attributes - static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; - // Flag for sign of offset. If this flag is set, the offset value must be negated. - static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; - - // Mask for attribute frequency, stored on 4 bits inside the flags byte. - static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F; - - // Mask and flags for attribute address type selection. - static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; - static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; - static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; - static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; - // Error tolerances static const int DEFAULT_MAX_ERRORS = 2; static const int MAX_ERRORS_FOR_TWO_WORDS = 1; @@ -73,9 +39,9 @@ class UnigramDictionary { static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0; static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1; static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; - UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, + UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - int getFrequency(const int32_t* const inWord, const int length) const; + int getFrequency(const int32_t *const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, @@ -92,14 +58,14 @@ class UnigramDictionary { const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const; int getDigraphReplacement(const int *codes, const int i, const int codesSize, - const digraph_t* const digraphs, const unsigned int digraphsSize) const; + const digraph_t *const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, - const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, + const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, Correction *correction, - WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs, + const bool useFullEditDistance, const int *codesSrc, const int codesRemain, + const int currentDepth, int *codesDest, Correction *correction, + WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, const unsigned int digraphsSize) const; void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, @@ -107,16 +73,16 @@ class UnigramDictionary { void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool) const; + Correction *correction, WordsPriorityQueuePool *queuePool) const; void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const; void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate) const; void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, @@ -128,25 +94,25 @@ class UnigramDictionary { const int currentWordIndex) const; int getMostFrequentWordLike(const int startInputIndex, const int inputLength, Correction *correction, unsigned short *word) const; - int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, + int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length, short unsigned int *outWord) const; int getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool* queuePool, const int inputLength, + WordsPriorityQueuePool *queuePool, const int inputLength, const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int *wordLengthArray, unsigned short* outputWord, int *outputWordLength) const; + int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const; void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, - const int outputWordLength, int *freqArray, int* wordLengthArray, - unsigned short* outputWord) const; + const int outputWordLength, int *freqArray, int *wordLengthArray, + unsigned short *outputWord) const; - const uint8_t* const DICT_ROOT; + const uint8_t *const DICT_ROOT; const int MAX_WORD_LENGTH; const int MAX_WORDS; const int TYPED_LETTER_MULTIPLIER; @@ -160,5 +126,4 @@ class UnigramDictionary { static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; }; } // namespace latinime - #endif // LATINIME_UNIGRAM_DICTIONARY_H diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index 9c6d28d60..c0dedb59d 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -18,8 +18,9 @@ #define LATINIME_WORDS_PRIORITY_QUEUE_H #include <cstring> // for memcpy() -#include <iostream> #include <queue> + +#include "correction.h" #include "defines.h" namespace latinime { @@ -33,7 +34,7 @@ class WordsPriorityQueue { int mWordLength; bool mUsed; - void setParams(int score, unsigned short* word, int wordLength) { + void setParams(int score, unsigned short *word, int wordLength) { mScore = score; mWordLength = wordLength; memcpy(mWord, word, sizeof(unsigned short) * wordLength); @@ -55,8 +56,8 @@ class WordsPriorityQueue { delete[] mSuggestedWords; } - void push(int score, unsigned short* word, int wordLength) { - SuggestedWord* sw = 0; + void push(int score, unsigned short *word, int wordLength) { + SuggestedWord *sw = 0; if (mSuggestions.size() >= MAX_WORDS) { sw = mSuggestions.top(); const int minScore = sw->mScore; @@ -86,21 +87,21 @@ class WordsPriorityQueue { } } - SuggestedWord* top() { + SuggestedWord *top() { if (mSuggestions.empty()) return 0; - SuggestedWord* sw = mSuggestions.top(); + SuggestedWord *sw = mSuggestions.top(); return sw; } - int outputSuggestions(const unsigned short* before, const int beforeLength, + int outputSuggestions(const unsigned short *before, const int beforeLength, int *frequencies, unsigned short *outputChars) { mHighestSuggestedWord = 0; const unsigned int size = min( MAX_WORDS, static_cast<unsigned int>(mSuggestions.size())); - SuggestedWord* swBuffer[size]; + SuggestedWord *swBuffer[size]; int index = size - 1; while (!mSuggestions.empty() && index >= 0) { - SuggestedWord* sw = mSuggestions.top(); + SuggestedWord *sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { AKLOGI("dump word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); @@ -110,11 +111,11 @@ class WordsPriorityQueue { --index; } if (size >= 2) { - SuggestedWord* nsMaxSw = 0; + SuggestedWord *nsMaxSw = 0; unsigned int maxIndex = 0; float maxNs = 0; for (unsigned int i = 0; i < size; ++i) { - SuggestedWord* tempSw = swBuffer[i]; + SuggestedWord *tempSw = swBuffer[i]; if (!tempSw) { continue; } @@ -131,13 +132,13 @@ class WordsPriorityQueue { } } for (unsigned int i = 0; i < size; ++i) { - SuggestedWord* sw = swBuffer[i]; + SuggestedWord *sw = swBuffer[i]; if (!sw) { AKLOGE("SuggestedWord is null %d", i); continue; } const unsigned int wordLength = sw->mWordLength; - char* targetAdr = (char*) outputChars + i * MAX_WORD_LENGTH * sizeof(short); + char *targetAdr = (char*) outputChars + i * MAX_WORD_LENGTH * sizeof(short); frequencies[i] = sw->mScore; memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short)); if (wordLength < MAX_WORD_LENGTH) { @@ -155,7 +156,7 @@ class WordsPriorityQueue { void clear() { mHighestSuggestedWord = 0; while (!mSuggestions.empty()) { - SuggestedWord* sw = mSuggestions.top(); + SuggestedWord *sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { AKLOGI("Clear word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); @@ -172,8 +173,8 @@ class WordsPriorityQueue { DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength); } - float getHighestNormalizedScore(const unsigned short* before, const int beforeLength, - unsigned short** outWord, int *outScore, int *outLength) { + float getHighestNormalizedScore(const unsigned short *before, const int beforeLength, + unsigned short **outWord, int *outScore, int *outLength) { if (!mHighestSuggestedWord) { return 0.0; } @@ -189,7 +190,7 @@ class WordsPriorityQueue { } }; - SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word, int wordLength) { for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { if (!mSuggestedWords[i].mUsed) { @@ -200,10 +201,10 @@ class WordsPriorityQueue { return 0; } - static float getNormalizedScore(SuggestedWord* sw, const unsigned short* before, - const int beforeLength, unsigned short** outWord, int *outScore, int *outLength) { + static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before, + const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) { const int score = sw->mScore; - unsigned short* word = sw->mWord; + unsigned short *word = sw->mWord; const int wordLength = sw->mWordLength; if (outScore) { *outScore = score; @@ -223,9 +224,8 @@ class WordsPriorityQueue { Suggestions mSuggestions; const unsigned int MAX_WORDS; const unsigned int MAX_WORD_LENGTH; - SuggestedWord* mSuggestedWords; - SuggestedWord* mHighestSuggestedWord; + SuggestedWord *mSuggestedWords; + SuggestedWord *mHighestSuggestedWord; }; -} - +} // namespace latinime #endif // LATINIME_WORDS_PRIORITY_QUEUE_H diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h index b4e2bed26..38887291e 100644 --- a/native/jni/src/words_priority_queue_pool.h +++ b/native/jni/src/words_priority_queue_pool.h @@ -17,8 +17,7 @@ #ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H #define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H -#include <assert.h> -#include <new> +#include <cassert> #include "words_priority_queue.h" namespace latinime { @@ -44,11 +43,11 @@ class WordsPriorityQueuePool { } } - WordsPriorityQueue* getMasterQueue() { + WordsPriorityQueue *getMasterQueue() { return mMasterQueue; } - WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { + WordsPriorityQueue *getSubQueue(const int wordIndex, const int inputWordLength) { if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { return 0; } @@ -70,7 +69,7 @@ class WordsPriorityQueuePool { inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = getSubQueue(wordIndex, i); + WordsPriorityQueue *queue = getSubQueue(wordIndex, i); if (queue) { queue->clear(); } @@ -86,12 +85,11 @@ class WordsPriorityQueuePool { private: DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueuePool); - WordsPriorityQueue* mMasterQueue; - WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; + WordsPriorityQueue *mMasterQueue; + WordsPriorityQueue *mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; char mMasterQueueBuf[sizeof(WordsPriorityQueue)]; char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; }; -} - +} // namespace latinime #endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H |