aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/additional_proximity_chars.cpp6
-rw-r--r--native/jni/src/additional_proximity_chars.h32
-rw-r--r--native/jni/src/basechars.cpp8
-rw-r--r--native/jni/src/bigram_dictionary.cpp155
-rw-r--r--native/jni/src/bigram_dictionary.h30
-rw-r--r--native/jni/src/binary_format.h242
-rw-r--r--native/jni/src/bloom_filter.h16
-rw-r--r--native/jni/src/char_utils.cpp15
-rw-r--r--native/jni/src/char_utils.h22
-rw-r--r--native/jni/src/correction.cpp300
-rw-r--r--native/jni/src/correction.h195
-rw-r--r--native/jni/src/correction_state.h1
-rw-r--r--native/jni/src/debug.h72
-rw-r--r--native/jni/src/defines.h230
-rw-r--r--native/jni/src/dic_traverse_wrapper.cpp26
-rw-r--r--native/jni/src/dic_traverse_wrapper.h67
-rw-r--r--native/jni/src/dictionary.cpp102
-rw-r--r--native/jni/src/dictionary.h88
-rw-r--r--native/jni/src/geometry_utils.h93
-rw-r--r--native/jni/src/gesture/gesture_decoder_wrapper.cpp22
-rw-r--r--native/jni/src/gesture/gesture_decoder_wrapper.h70
-rw-r--r--native/jni/src/gesture/incremental_decoder_interface.h41
-rw-r--r--native/jni/src/gesture/incremental_decoder_wrapper.cpp22
-rw-r--r--native/jni/src/gesture/incremental_decoder_wrapper.h71
-rw-r--r--native/jni/src/hash_map_compat.h34
-rw-r--r--native/jni/src/proximity_info.cpp422
-rw-r--r--native/jni/src/proximity_info.h169
-rw-r--r--native/jni/src/proximity_info_state.cpp542
-rw-r--r--native/jni/src/proximity_info_state.h294
-rw-r--r--native/jni/src/terminal_attributes.h44
-rw-r--r--native/jni/src/unigram_dictionary.cpp405
-rw-r--r--native/jni/src/unigram_dictionary.h127
-rw-r--r--native/jni/src/words_priority_queue.h81
-rw-r--r--native/jni/src/words_priority_queue_pool.h30
34 files changed, 2693 insertions, 1381 deletions
diff --git a/native/jni/src/additional_proximity_chars.cpp b/native/jni/src/additional_proximity_chars.cpp
index 224f020f2..f59492741 100644
--- a/native/jni/src/additional_proximity_chars.cpp
+++ b/native/jni/src/additional_proximity_chars.cpp
@@ -17,7 +17,9 @@
#include "additional_proximity_chars.h"
namespace latinime {
-const std::string AdditionalProximityChars::LOCALE_EN_US("en");
+// TODO: Stop using hardcoded additional proximity characters.
+// TODO: Have proximity character informations in each language's binary dictionary.
+const char *AdditionalProximityChars::LOCALE_EN_US = "en";
const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_A[EN_US_ADDITIONAL_A_SIZE] = {
'e', 'i', 'o', 'u'
@@ -38,4 +40,4 @@ const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_O[EN_US_ADDITIONAL_O_SI
const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_U[EN_US_ADDITIONAL_U_SIZE] = {
'a', 'e', 'i', 'o'
};
-}
+} // namespace latinime
diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h
index e0ecc0e1d..d420c4664 100644
--- a/native/jni/src/additional_proximity_chars.h
+++ b/native/jni/src/additional_proximity_chars.h
@@ -17,8 +17,8 @@
#ifndef LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
#define LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
+#include <cstring>
#include <stdint.h>
-#include <string>
#include "defines.h"
@@ -26,7 +26,8 @@ namespace latinime {
class AdditionalProximityChars {
private:
- static const std::string LOCALE_EN_US;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(AdditionalProximityChars);
+ static const char *LOCALE_EN_US;
static const int EN_US_ADDITIONAL_A_SIZE = 4;
static const int32_t EN_US_ADDITIONAL_A[];
static const int EN_US_ADDITIONAL_E_SIZE = 4;
@@ -38,17 +39,18 @@ class AdditionalProximityChars {
static const int EN_US_ADDITIONAL_U_SIZE = 4;
static const int32_t EN_US_ADDITIONAL_U[];
- static bool isEnLocale(const std::string *locale_str) {
- return locale_str && locale_str->size() >= LOCALE_EN_US.size()
- && LOCALE_EN_US.compare(0, LOCALE_EN_US.size(), *locale_str);
+ static bool isEnLocale(const char *localeStr) {
+ const size_t LOCALE_EN_US_SIZE = strlen(LOCALE_EN_US);
+ return localeStr && strlen(localeStr) >= LOCALE_EN_US_SIZE
+ && strncmp(localeStr, LOCALE_EN_US, LOCALE_EN_US_SIZE) == 0;
}
public:
- static int getAdditionalCharsSize(const std::string* locale_str, const int32_t c) {
- if (!isEnLocale(locale_str)) {
+ static int getAdditionalCharsSize(const char *localeStr, const int32_t c) {
+ if (!isEnLocale(localeStr)) {
return 0;
}
- switch(c) {
+ switch (c) {
case 'a':
return EN_US_ADDITIONAL_A_SIZE;
case 'e':
@@ -64,11 +66,11 @@ class AdditionalProximityChars {
}
}
- static const int32_t* getAdditionalChars(const std::string *locale_str, const int32_t c) {
- if (!isEnLocale(locale_str)) {
+ static const int32_t *getAdditionalChars(const char *localeStr, const int32_t c) {
+ if (!isEnLocale(localeStr)) {
return 0;
}
- switch(c) {
+ switch (c) {
case 'a':
return EN_US_ADDITIONAL_A;
case 'e':
@@ -83,12 +85,6 @@ class AdditionalProximityChars {
return 0;
}
}
-
- static bool hasAdditionalChars(const std::string *locale_str, const int32_t c) {
- return getAdditionalCharsSize(locale_str, c) > 0;
- }
};
-
-}
-
+} // namespace latinime
#endif // LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp
index 31f1e18a8..379cb6226 100644
--- a/native/jni/src/basechars.cpp
+++ b/native/jni/src/basechars.cpp
@@ -14,17 +14,19 @@
* limitations under the License.
*/
+#include <stdint.h>
+
#include "char_utils.h"
namespace latinime {
-/**
+/*
* Table mapping most combined Latin, Greek, and Cyrillic characters
* to their base characters. If c is in range, BASE_CHARS[c] == c
* if c is not a combined character, or the base character if it
* is combined.
*/
-const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
+const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
@@ -187,8 +189,6 @@ const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
};
-
// generated with:
// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
-
} // namespace latinime
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 9ef024dc4..dade4f16b 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -1,21 +1,20 @@
/*
-**
-** Copyright 2010, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
-
-#include <string.h>
+ * Copyright (C) 2010, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstring>
#define LOG_TAG "LatinIME: bigram_dictionary.cpp"
@@ -27,9 +26,8 @@
namespace latinime {
-BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
- Dictionary *parentDictionary)
- : DICT(dict), MAX_WORD_LENGTH(maxWordLength), mParentDictionary(parentDictionary) {
+BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions)
+ : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_PREDICTIONS(maxPredictions) {
if (DEBUG_DICT) {
AKLOGI("BigramDictionary - constructor");
}
@@ -38,7 +36,8 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
BigramDictionary::~BigramDictionary() {
}
-bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency) {
+bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency,
+ int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const {
word[length] = 0;
if (DEBUG_DICT) {
#ifdef FLAG_DBG
@@ -50,25 +49,26 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
// Find the right insertion point
int insertAt = 0;
- while (insertAt < mMaxBigrams) {
- if (frequency > mBigramFreq[insertAt] || (mBigramFreq[insertAt] == frequency
- && length < Dictionary::wideStrLen(mBigramChars + insertAt * MAX_WORD_LENGTH))) {
+ while (insertAt < MAX_PREDICTIONS) {
+ if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
+ && length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) {
break;
}
insertAt++;
}
if (DEBUG_DICT) {
- AKLOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
+ AKLOGI("Bigram: InsertAt -> %d MAX_PREDICTIONS: %d", insertAt, MAX_PREDICTIONS);
}
- if (insertAt < mMaxBigrams) {
- memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
- (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
- (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0]));
- mBigramFreq[insertAt] = frequency;
- memmove((char*) mBigramChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
- (char*) mBigramChars + (insertAt ) * MAX_WORD_LENGTH * sizeof(short),
- (mMaxBigrams - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
- unsigned short *dest = mBigramChars + (insertAt ) * MAX_WORD_LENGTH;
+ if (insertAt < MAX_PREDICTIONS) {
+ memmove(bigramFreq + (insertAt + 1),
+ bigramFreq + insertAt,
+ (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
+ bigramFreq[insertAt] = frequency;
+ outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
+ memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH,
+ bigramChars + insertAt * MAX_WORD_LENGTH,
+ (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH);
+ unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH;
while (length--) {
*dest++ = *word++;
}
@@ -84,12 +84,11 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
/* Parameters :
* prevWord: the word before, the one for which we need to look up bigrams.
* prevWordLength: its length.
- * codes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
+ * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
* codesSize: the size of the codes array.
* bigramChars: an array for output, at the same format as outwords for getSuggestions.
* bigramFreq: an array to output frequencies.
- * maxWordLength: the maximum size of a word.
- * maxBigrams: the maximum number of bigrams fitting in the bigramChars array.
+ * outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility.
* Note: this is not the number of bigrams output in the array, which is the number of
* bigrams this word has WHOSE first letter also matches the letter the user typed.
@@ -98,21 +97,23 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter.
*/
-int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes,
- int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
- int maxBigrams) {
+int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes,
+ int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
- mBigramFreq = bigramFreq;
- mBigramChars = bigramChars;
- mInputCodes = codes;
- mMaxBigrams = maxBigrams;
- const uint8_t* const root = DICT;
- int pos = getBigramListPositionForWord(prevWord, prevWordLength);
+ const uint8_t *const root = DICT;
+ int pos = getBigramListPositionForWord(prevWord, prevWordLength,
+ false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
+ if (0 == pos) {
+ // If no bigrams for this exact word, search again in lower case.
+ pos = getBigramListPositionForWord(prevWord, prevWordLength,
+ true /* forceLowerCaseSearch */);
+ }
+ // If still no bigrams, we really don't have them!
if (0 == pos) return 0;
- int bigramFlags;
+ uint8_t bigramFlags;
int bigramCount = 0;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
@@ -124,36 +125,38 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
bigramBuffer, &unigramFreq);
// codesSize == 0 means we are trying to find bigram predictions.
- if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) {
- const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ if (codesSize < 1 || checkFirstCharacter(bigramBuffer, inputCodes)) {
+ const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
// Due to space constraints, the frequency for bigrams is approximate - the lower the
// unigram frequency, the worse the precision. The theoritical maximum error in
// resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
// in very bad cases. This means that sometimes, we'll see some bigrams interverted
// here, but it can't get too bad.
const int frequency =
- BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq);
- if (addWordBigram(bigramBuffer, length, frequency)) {
+ BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
+ if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars,
+ outputTypes)) {
++bigramCount;
}
}
- } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
+ } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
return bigramCount;
}
// Returns a pointer to the start of the bigram list.
// If the word is not found or has no bigrams, this function returns 0.
int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
- const int prevWordLength) {
+ const int prevWordLength, const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return 0;
- const uint8_t* const root = DICT;
- int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
+ const uint8_t *const root = DICT;
+ int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
+ forceLowerCaseSearch);
if (NOT_VALID_WORD == pos) return 0;
- const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- if (0 == (flags & UnigramDictionary::FLAG_HAS_BIGRAMS)) return 0;
- if (0 == (flags & UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS)) {
- BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
+ if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
+ BinaryFormat::getCodePointAndForwardPointer(root, &pos);
} else {
pos = BinaryFormat::skipOtherCharacters(root, pos);
}
@@ -164,28 +167,33 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
}
void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord,
- const int prevWordLength, std::map<int, int> *map, uint8_t *filter) {
+ const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const {
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
- const uint8_t* const root = DICT;
- int pos = getBigramListPositionForWord(prevWord, prevWordLength);
+ const uint8_t *const root = DICT;
+ int pos = getBigramListPositionForWord(prevWord, prevWordLength,
+ false /* forceLowerCaseSearch */);
+ if (0 == pos) {
+ // If no bigrams for this exact string, search again in lower case.
+ pos = getBigramListPositionForWord(prevWord, prevWordLength,
+ true /* forceLowerCaseSearch */);
+ }
if (0 == pos) return;
- int bigramFlags;
+ uint8_t bigramFlags;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ const int frequency = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
(*map)[bigramPos] = frequency;
setInFilter(filter, bigramPos);
- } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
+ } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
}
-bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
+bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
// Checks whether this word starts with same character or neighboring characters of
// what user typed.
- int *inputCodes = mInputCodes;
int maxAlt = MAX_ALTERNATIVES;
const unsigned short firstBaseChar = toBaseLowerCase(*word);
while (maxAlt > 0) {
@@ -199,14 +207,15 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
}
bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
- int length2) {
- const uint8_t* const root = DICT;
- int pos = getBigramListPositionForWord(word1, length1);
+ int length2) const {
+ const uint8_t *const root = DICT;
+ int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;
- int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
+ int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2,
+ false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == nextWordPos) return false;
- int bigramFlags;
+ uint8_t bigramFlags;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
@@ -214,7 +223,7 @@ bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const in
if (bigramPos == nextWordPos) {
return true;
}
- } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
+ } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
return false;
}
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index b8763a515..5f11ae822 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -24,39 +24,33 @@
namespace latinime {
-class Dictionary;
class BigramDictionary {
public:
- BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary);
- int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
- unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
- int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
+ BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
+ int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
+ unsigned short *outWords, int *frequencies, int *outputTypes) const;
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
- std::map<int, int> *map, uint8_t *filter);
- bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
+ std::map<int, int> *map, uint8_t *filter) const;
+ bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
~BigramDictionary();
private:
- bool addWordBigram(unsigned short *word, int length, int frequency);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
+ bool addWordBigram(unsigned short *word, int length, int frequency,
+ int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const;
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
void searchForTerminalNode(int addressLookingFor, int frequency);
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
- bool checkFirstCharacter(unsigned short *word);
+ bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
+ int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
+ const bool forceLowerCaseSearch) const;
const unsigned char *DICT;
const int MAX_WORD_LENGTH;
+ const int MAX_PREDICTIONS;
// TODO: Re-implement proximity correction for bigram correction
static const int MAX_ALTERNATIVES = 1;
-
- Dictionary *mParentDictionary;
- int *mBigramFreq;
- int mMaxBigrams;
- unsigned short *mBigramChars;
- int *mInputCodes;
- int mInputLength;
};
-
} // namespace latinime
-
#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 51bf8ebbc..eec52e323 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -18,18 +18,53 @@
#define LATINIME_BINARY_FORMAT_H
#include <limits>
+#include <map>
#include "bloom_filter.h"
-#include "unigram_dictionary.h"
+#include "char_utils.h"
namespace latinime {
class BinaryFormat {
- private:
- const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
- const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
- const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-
public:
+ // Mask and flags for children address type selection.
+ static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
+ static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
+ static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
+ static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
+ static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+
+ // Flag for single/multiple char group
+ static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
+
+ // Flag for terminal groups
+ static const int FLAG_IS_TERMINAL = 0x10;
+
+ // Flag for shortcut targets presence
+ static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
+ // Flag for bigram presence
+ static const int FLAG_HAS_BIGRAMS = 0x04;
+ // Flag for non-words (typically, shortcut only entries)
+ static const int FLAG_IS_NOT_A_WORD = 0x02;
+ // Flag for blacklist
+ static const int FLAG_IS_BLACKLISTED = 0x01;
+
+ // Attribute (bigram/shortcut) related flags:
+ // Flag for presence of more attributes
+ static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+ // Flag for sign of offset. If this flag is set, the offset value must be negated.
+ static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+
+ // Mask for attribute frequency, stored on 4 bits inside the flags byte.
+ static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
+ // The numeric value of the shortcut frequency that means 'whitelist'.
+ static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
+
+ // Mask and flags for attribute address type selection.
+ static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+
const static int UNKNOWN_FORMAT = -1;
// Originally, format version 1 had a 16-bit magic number, then the version number `01'
// then options that must be 0. Hence the first 32-bits of the format are always as follow
@@ -44,29 +79,29 @@ class BinaryFormat {
const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
const static int SHORTCUT_LIST_SIZE_SIZE = 2;
- static int detectFormat(const uint8_t* const dict);
- static unsigned int getHeaderSize(const uint8_t* const dict);
- static unsigned int getFlags(const uint8_t* const dict);
- static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos);
- static uint8_t getFlagsAndForwardPointer(const uint8_t* const dict, int* pos);
- static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos);
- static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos);
- static int skipOtherCharacters(const uint8_t* const dict, const int pos);
+ static int detectFormat(const uint8_t *const dict);
+ static unsigned int getHeaderSize(const uint8_t *const dict);
+ static unsigned int getFlags(const uint8_t *const dict);
+ static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
+ static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
+ static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
+ static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
+ static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos);
static int skipFrequency(const uint8_t flags, const int pos);
- static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos);
- static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos);
- static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos);
- static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags,
+ static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
+ static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
+ static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
const int pos);
- static int readChildrenPosition(const uint8_t* const dict, const uint8_t flags, const int pos);
+ static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
static bool hasChildrenInFlags(const uint8_t flags);
- static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
+ static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
- static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
- const int length);
- static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
- uint16_t* outWord, int* outUnigramFrequency);
+ static int getAttributeFrequencyFromFlags(const int flags);
+ static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
+ const int length, const bool forceLowerCaseSearch);
+ static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
+ uint16_t *outWord, int *outUnigramFrequency);
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@@ -79,9 +114,16 @@ class BinaryFormat {
REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
};
const static unsigned int NO_FLAGS = 0;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
+ const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
+ const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
+ const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
+ static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
};
-inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
+inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
// The magic number is stored big-endian.
const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3];
switch (magicNumber) {
@@ -103,7 +145,7 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
}
}
-inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) {
+inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) {
switch (detectFormat(dict)) {
case 1:
return NO_FLAGS;
@@ -112,7 +154,7 @@ inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) {
}
}
-inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) {
+inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
switch (detectFormat(dict)) {
case 1:
return FORMAT_VERSION_1_HEADER_SIZE;
@@ -124,41 +166,41 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) {
}
}
-inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) {
+inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) {
const int msb = dict[(*pos)++];
if (msb < 0x80) return msb;
return ((msb & 0x7F) << 8) | dict[(*pos)++];
}
-inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) {
+inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) {
return dict[(*pos)++];
}
-inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos) {
+inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
const int origin = *pos;
- const int32_t character = dict[origin];
- if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
- if (character == CHARACTER_ARRAY_TERMINATOR) {
+ const int32_t codePoint = dict[origin];
+ if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
+ if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1;
- return NOT_A_CHARACTER;
+ return NOT_A_CODE_POINT;
} else {
*pos = origin + 3;
- const int32_t char_1 = character << 16;
+ const int32_t char_1 = codePoint << 16;
const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2];
}
} else {
*pos = origin + 1;
- return character;
+ return codePoint;
}
}
-inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t* const dict,
+inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const dict,
const int pos) {
return dict[pos];
}
-inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const int pos) {
+inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
int currentPos = pos;
int32_t character = dict[currentPos++];
while (CHARACTER_ARRAY_TERMINATOR != character) {
@@ -172,22 +214,22 @@ inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const in
static inline int attributeAddressSize(const uint8_t flags) {
static const int ATTRIBUTE_ADDRESS_SHIFT = 4;
- return (flags & UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
+ return (flags & BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
/* Note: this is a value-dependant optimization of what may probably be
more readably written this way:
- switch (flags * UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) {
- case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
- case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
- case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
+ switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
default: return 0;
}
*/
}
-static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) {
+static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) {
int currentPos = pos;
uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
- while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) {
+ while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) {
currentPos += attributeAddressSize(flags);
flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
}
@@ -197,11 +239,11 @@ static inline int skipExistingBigrams(const uint8_t* const dict, const int pos)
static inline int childrenAddressSize(const uint8_t flags) {
static const int CHILDREN_ADDRESS_SHIFT = 6;
- return (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
+ return (BinaryFormat::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
/* See the note in attributeAddressSize. The same applies here */
}
-static inline int shortcutByteSize(const uint8_t* const dict, const int pos) {
+static inline int shortcutByteSize(const uint8_t *const dict, const int pos) {
return ((int)(dict[pos] << 8)) + (dict[pos + 1]);
}
@@ -210,28 +252,28 @@ inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos
}
inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
- return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
+ return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
}
-inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
const int pos) {
- if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
+ if (FLAG_HAS_SHORTCUT_TARGETS & flags) {
return pos + shortcutByteSize(dict, pos);
} else {
return pos;
}
}
-inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
const int pos) {
- if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
+ if (FLAG_HAS_BIGRAMS & flags) {
return skipExistingBigrams(dict, pos);
} else {
return pos;
}
}
-inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
const int pos) {
// This function skips all attributes: shortcuts and bigrams.
int newPos = pos;
@@ -240,7 +282,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint
return newPos;
}
-inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
+inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
const uint8_t flags, const int pos) {
int currentPos = pos;
currentPos = skipChildrenPosition(flags, currentPos);
@@ -248,18 +290,18 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
return currentPos;
}
-inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags,
const int pos) {
int offset = 0;
- switch (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) {
- case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+ switch (MASK_GROUP_ADDRESS_TYPE & flags) {
+ case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
offset = dict[pos];
break;
- case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+ case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
offset = dict[pos] << 8;
offset += dict[pos + 1];
break;
- case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+ case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
offset = dict[pos] << 16;
offset += dict[pos + 1] << 8;
offset += dict[pos + 2];
@@ -273,74 +315,77 @@ inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const u
}
inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
- return (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
- != (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
+ return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
}
-inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* const dict,
+inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
const uint8_t flags, int *pos) {
int offset = 0;
const int origin = *pos;
- switch (UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
- case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
offset = dict[origin];
*pos = origin + 1;
break;
- case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
offset = dict[origin] << 8;
offset += dict[origin + 1];
*pos = origin + 2;
break;
- case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
offset = dict[origin] << 16;
offset += dict[origin + 1] << 8;
offset += dict[origin + 2];
*pos = origin + 3;
break;
}
- if (UnigramDictionary::FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
+ if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
return origin - offset;
} else {
return origin + offset;
}
}
+inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
+ return flags & MASK_ATTRIBUTE_FREQUENCY;
+}
+
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
-inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
- const int32_t* const inWord, const int length) {
+inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
+ const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
int pos = 0;
int wordPos = 0;
while (true) {
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
- if (wordPos > length) return NOT_VALID_WORD;
+ if (wordPos >= length) return NOT_VALID_WORD;
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
- const int32_t wChar = inWord[wordPos];
+ const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) {
// If there are no more character groups in this node, it means we could not
// find a matching character for this depth, therefore there is no match.
if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+ int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) {
// This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is
// no match and we can return NOT_VALID_WORD. So we will check all the characters
// in this character group indeed does match.
- if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
- character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
- while (NOT_A_CHARACTER != character) {
+ if (FLAG_HAS_MULTIPLE_CHARS & flags) {
+ character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ while (NOT_A_CODE_POINT != character) {
++wordPos;
// If we shoot the length of the word we search for, or if we find a single
// character that does not match, as explained above, it means the word is
// not in the dictionary (by virtue of this chargroup being the only one to
// match the word on the first character, but not matching the whole word).
- if (wordPos > length) return NOT_VALID_WORD;
+ if (wordPos >= length) return NOT_VALID_WORD;
if (inWord[wordPos] != character) return NOT_VALID_WORD;
- character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+ character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
}
}
// If we come here we know that so far, we do match. Either we are on a terminal
@@ -348,14 +393,13 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
// If we don't match the length AND don't have children, then a word in the
// dictionary fully matches a prefix of the searched word but not the full word.
++wordPos;
- if (UnigramDictionary::FLAG_IS_TERMINAL & flags) {
+ if (FLAG_IS_TERMINAL & flags) {
if (wordPos == length) {
return charGroupPos;
}
- pos = BinaryFormat::skipFrequency(UnigramDictionary::FLAG_IS_TERMINAL, pos);
+ pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos);
}
- if (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
- == (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)) {
+ if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
return NOT_VALID_WORD;
}
// We have children and we are still shorter than the word we are searching for, so
@@ -365,7 +409,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
break;
} else {
// This chargroup does not match, so skip the remaining part and go to the next.
- if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+ if (FLAG_HAS_MULTIPLE_CHARS & flags) {
pos = BinaryFormat::skipOtherCharacters(root, pos);
}
pos = BinaryFormat::skipFrequency(flags, pos);
@@ -394,8 +438,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
* outUnigramFrequency: a pointer to an int to write the frequency into.
* Return value : the length of the word, of 0 if the word was not found.
*/
-inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address,
- const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) {
+inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
+ const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
int pos = 0;
int wordPos = 0;
@@ -413,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
--charGroupCount) {
const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
- const int32_t character = getCharCodeAndForwardPointer(root, &pos);
+ const int32_t character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return
// the length.
outWord[wordPos] = character;
- if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
- int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
+ if (FLAG_HAS_MULTIPLE_CHARS & flags) {
+ int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug
int charCount = maxDepth;
- while (NOT_A_CHARACTER != nextChar && --charCount > 0) {
+ while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
- nextChar = getCharCodeAndForwardPointer(root, &pos);
+ nextChar = getCodePointAndForwardPointer(root, &pos);
}
}
*outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
@@ -433,7 +477,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
}
// We need to skip past this char group, so skip any remaining chars after the
// first and possibly the frequency.
- if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+ if (FLAG_HAS_MULTIPLE_CHARS & flags) {
pos = skipOtherCharacters(root, pos);
}
pos = skipFrequency(flags, pos);
@@ -441,8 +485,8 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
// The fact that this group has children is very important. Since we already know
// that this group does not match, if it has no children we know it is irrelevant
// to what we are searching for.
- const bool hasChildren = (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
- (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
+ const bool hasChildren = (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
+ (MASK_GROUP_ADDRESS_TYPE & flags));
// We will write in `found' whether we have passed the children address we are
// searching for. For example if we search for "beer", the children of b are less
// than the address we are searching for and the children of c are greater. When we
@@ -479,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
const int32_t lastChar =
- getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
+ getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar;
- if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
+ if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
int32_t nextChar =
- getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
+ getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
- nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
+ nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
}
}
++wordPos;
@@ -538,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const
// 0 for the bigram frequency represents the middle of the 16th step from the top,
// while a value of 15 represents the middle of the top step.
// See makedict.BinaryDictInputOutput for details.
- const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
- return (int)(unigramFreq + (bigramFreq + 1) * stepSize);
+ const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
+ return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
}
// This returns a probability in log space.
@@ -555,7 +599,5 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int,
return backoff(unigramFreq);
}
}
-
} // namespace latinime
-
#endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/jni/src/bloom_filter.h b/native/jni/src/bloom_filter.h
index 7ae6a1fa4..bcce1f7ea 100644
--- a/native/jni/src/bloom_filter.h
+++ b/native/jni/src/bloom_filter.h
@@ -23,16 +23,16 @@
namespace latinime {
-static inline void setInFilter(uint8_t *filter, const int position) {
- const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
- filter[bucket >> 3] |= (1 << (bucket & 0x7));
+// TODO: uint32_t position
+static inline void setInFilter(uint8_t *filter, const int32_t position) {
+ const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
+ filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7));
}
-static inline bool isInFilter(const uint8_t *filter, const int position) {
- const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
- return filter[bucket >> 3] & (1 << (bucket & 0x7));
+// TODO: uint32_t position
+static inline bool isInFilter(const uint8_t *filter, const int32_t position) {
+ const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
+ return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7));
}
-
} // namespace latinime
-
#endif // LATINIME_BLOOM_FILTER_H
diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp
index a31a0632c..9d886da31 100644
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/char_utils.cpp
@@ -14,7 +14,9 @@
* limitations under the License.
*/
-#include <stdlib.h>
+#include <cstdlib>
+
+#include "char_utils.h"
namespace latinime {
@@ -883,17 +885,16 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
};
static int compare_pair_capital(const void *a, const void *b) {
- return (int)(*(unsigned short *)a)
- - (int)((struct LatinCapitalSmallPair*)b)->capital;
+ return static_cast<int>(*static_cast<const unsigned short *>(a))
+ - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
}
-unsigned short latin_tolower(unsigned short c) {
+unsigned short latin_tolower(const unsigned short c) {
struct LatinCapitalSmallPair *p =
- (struct LatinCapitalSmallPair *)bsearch(&c, SORTED_CHAR_MAP,
+ static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]),
sizeof(SORTED_CHAR_MAP[0]),
- compare_pair_capital);
+ compare_pair_capital));
return p ? p->small : c;
}
-
} // namespace latinime
diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h
index 607dc5195..b17f262ec 100644
--- a/native/jni/src/char_utils.h
+++ b/native/jni/src/char_utils.h
@@ -17,21 +17,24 @@
#ifndef LATINIME_CHAR_UTILS_H
#define LATINIME_CHAR_UTILS_H
+#include <cctype>
+#include <stdint.h>
+
namespace latinime {
-inline static int isAsciiUpper(unsigned short c) {
- return c >= 'A' && c <= 'Z';
+inline static bool isAsciiUpper(unsigned short c) {
+ return isupper(static_cast<int>(c)) != 0;
}
inline static unsigned short toAsciiLower(unsigned short c) {
return c - 'A' + 'a';
}
-inline static int isAscii(unsigned short c) {
- return c <= 127;
+inline static bool isAscii(unsigned short c) {
+ return isascii(static_cast<int>(c)) != 0;
}
-unsigned short latin_tolower(unsigned short c);
+unsigned short latin_tolower(const unsigned short c);
/**
* Table mapping most combined Latin, Greek, and Cyrillic characters
@@ -41,7 +44,7 @@ unsigned short latin_tolower(unsigned short c);
*/
static const int BASE_CHARS_SIZE = 0x0500;
-extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
inline static unsigned short toBaseChar(unsigned short c) {
if (c < BASE_CHARS_SIZE) {
@@ -50,8 +53,7 @@ inline static unsigned short toBaseChar(unsigned short c) {
return c;
}
-inline static unsigned short toBaseLowerCase(unsigned short c) {
- c = toBaseChar(c);
+inline static unsigned short toLowerCase(const unsigned short c) {
if (isAsciiUpper(c)) {
return toAsciiLower(c);
} else if (isAscii(c)) {
@@ -60,6 +62,8 @@ inline static unsigned short toBaseLowerCase(unsigned short c) {
return latin_tolower(c);
}
+inline static unsigned short toBaseLowerCase(const unsigned short c) {
+ return toLowerCase(toBaseChar(c));
+}
} // namespace latinime
-
#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 99f5b92c1..49e3e3c8c 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -14,22 +14,22 @@
* limitations under the License.
*/
-#include <assert.h>
-#include <ctype.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstring>
#define LOG_TAG "LatinIME: correction.cpp"
#include "char_utils.h"
#include "correction.h"
#include "defines.h"
-#include "dictionary.h"
-#include "proximity_info.h"
+#include "proximity_info_state.h"
namespace latinime {
+class ProximityInfo;
+
/////////////////////////////
// edit distance funcitons //
/////////////////////////////
@@ -55,25 +55,25 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
}
AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
- (void)c;
+ (void)c; // To suppress compiler warning
}
}
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
- const int inputLength, const unsigned short *output, const int outputLength) {
+ const int inputSize, const unsigned short *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
- // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
- // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
- // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
- int *const current = editDistanceTable + outputLength * (inputLength + 1);
- const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
+ // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
+ // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
+ // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
+ int *const current = editDistanceTable + outputLength * (inputSize + 1);
+ const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
current[0] = outputLength;
const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
- for (int i = 1; i <= inputLength; ++i) {
+ for (int i = 1; i <= inputSize; ++i) {
const uint32_t ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
@@ -84,42 +84,37 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
}
inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
- const int outputLength, const int inputLength) {
+ const int outputLength, const int inputSize) {
if (DEBUG_EDIT_DISTANCE) {
- AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength);
}
- return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
+ return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize];
}
//////////////////////
// inline functions //
//////////////////////
-static const char QUOTE = '\'';
+static const char SINGLE_QUOTE = '\'';
-inline bool Correction::isQuote(const unsigned short c) {
- const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
- return (c == QUOTE && userTypedChar != QUOTE);
+inline bool Correction::isSingleQuote(const unsigned short c) {
+ const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
+ return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
}
////////////////
// Correction //
////////////////
-Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
- : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
- initEditDistance(mEditDistanceTable);
-}
-
void Correction::resetCorrection() {
mTotalTraverseCount = 0;
}
-void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
+void Correction::initCorrection(const ProximityInfo *pi, const int inputSize,
const int maxDepth) {
mProximityInfo = pi;
- mInputLength = inputLength;
+ mInputSize = inputSize;
mMaxDepth = maxDepth;
- mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2;
// TODO: This is not supposed to be required. Check what's going wrong with
// editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
initEditDistance(mEditDistanceTable);
@@ -159,11 +154,13 @@ void Correction::checkState() {
if (mSkipPos >= 0) ++inputCount;
if (mExcessivePos >= 0) ++inputCount;
if (mTransposedPos >= 0) ++inputCount;
- // TODO: remove this assert
- assert(inputCount <= 1);
}
}
+bool Correction::sameAsTyped() {
+ return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
+}
+
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
@@ -171,26 +168,22 @@ int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wo
}
int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
- return getFinalProbabilityInternal(probability, word, wordLength, mInputLength);
+ return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
}
int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int *wordLength, const int inputLength) {
- return getFinalProbabilityInternal(probability, word, wordLength, inputLength);
+ int *wordLength, const int inputSize) {
+ return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
}
int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
- int *wordLength, const int inputLength) {
+ int *wordLength, const int inputSize) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (outputIndex < MIN_SUGGEST_DEPTH) {
- return NOT_A_PROBABILITY;
- }
-
*word = mWord;
int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
- inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength);
+ inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
return finalProbability;
}
@@ -233,7 +226,7 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getInputIndex() {
+int Correction::getInputIndex() const {
return mInputIndex;
}
@@ -277,13 +270,13 @@ bool Correction::needsToPrune() const {
// TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
// Allow one char longer word for missing character
- || (!mDoAutoCompletion && (mOutputIndex > mInputLength));
+ || (!mDoAutoCompletion && (mOutputIndex > mInputSize));
}
void Correction::addCharToCurrentWord(const int32_t c) {
mWord[mOutputIndex] = c;
- const unsigned short *primaryInputWord = mProximityInfo->getPrimaryInputWord();
- calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength,
+ const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
+ calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
mWord, mOutputIndex + 1);
}
@@ -308,13 +301,12 @@ Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
return UNRELATED;
}
-inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
- return type == ProximityInfo::EQUIVALENT_CHAR;
+inline bool isEquivalentChar(ProximityType type) {
+ return type == EQUIVALENT_CHAR;
}
-inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) {
- return type == ProximityInfo::EQUIVALENT_CHAR
- || type == ProximityInfo::NEAR_PROXIMITY_CHAR;
+inline bool isProximityCharOrEquivalentChar(ProximityType type) {
+ return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
}
Correction::CorrectionType Correction::processCharAndCalcState(
@@ -331,25 +323,25 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mDistances[mOutputIndex] = NOT_A_DISTANCE;
// Skip checking this node
- if (mNeedsToTraverseAllNodes || isQuote(c)) {
+ if (mNeedsToTraverseAllNodes || isSingleQuote(c)) {
bool incremented = false;
- if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
+ if (mLastCharExceeded && mInputIndex == mInputSize - 1) {
// TODO: Do not check the proximity if EditDistance exceeds the threshold
- const ProximityInfo::ProximityType matchId =
- mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex);
+ const ProximityType matchId = mProximityInfoState.getMatchedProximityId(
+ mInputIndex, c, true, &proximityIndex);
if (isEquivalentChar(matchId)) {
mLastCharExceeded = false;
--mExcessiveCount;
mDistances[mOutputIndex] =
- mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
- } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
+ mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
+ } else if (matchId == NEAR_PROXIMITY_CHAR) {
mLastCharExceeded = false;
--mExcessiveCount;
++mProximityCount;
- mDistances[mOutputIndex] =
- mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
+ mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(
+ mInputIndex, proximityIndex);
}
- if (!isQuote(c)) {
+ if (!isSingleQuote(c)) {
incrementInputIndex();
incremented = true;
}
@@ -362,7 +354,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
mExcessivePos = mOutputIndex;
}
- if (mExcessivePos < mInputLength - 1) {
+ if (mExcessivePos < mInputSize - 1) {
mExceeding = mExcessivePos == mInputIndex && canTryCorrection;
}
}
@@ -370,7 +362,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mSkipPos >= 0) {
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
if (DEBUG_DICT) {
- assert(mSkipPos == mOutputIndex - 1);
+ // TODO: Enable this assertion.
+ //assert(mSkipPos == mOutputIndex - 1);
}
mSkipPos = mOutputIndex;
}
@@ -381,14 +374,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
mTransposedPos = mOutputIndex;
}
- if (mTransposedPos < mInputLength - 1) {
+ if (mTransposedPos < mInputSize - 1) {
mTransposing = mInputIndex == mTransposedPos && canTryCorrection;
}
}
bool secondTransposing = false;
if (mTransposedCount % 2 == 1) {
- if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
+ if (isEquivalentChar(mProximityInfoState.getMatchedProximityId(
+ mInputIndex - 1, c, false))) {
++mTransposedCount;
secondTransposing = true;
} else if (mCorrectionStates[mOutputIndex].mExceeding) {
@@ -399,7 +393,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else {
--mTransposedCount;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
@@ -417,20 +411,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
? (noCorrectionsHappenedSoFar || mProximityCount == 0)
: (noCorrectionsHappenedSoFar && mProximityCount == 0);
- ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
- ? ProximityInfo::EQUIVALENT_CHAR
- : mProximityInfo->getMatchedProximityId(
+ ProximityType matchedProximityCharId = secondTransposing
+ ? EQUIVALENT_CHAR
+ : mProximityInfoState.getMatchedProximityId(
mInputIndex, c, checkProximityChars, &proximityIndex);
- if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
- || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (UNRELATED_CHAR == matchedProximityCharId
+ || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
if (canTryCorrection && mOutputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding
- && isEquivalentChar(mProximityInfo->getMatchedProximityId(
+ && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
@@ -446,27 +440,27 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// Here, we are doing something equivalent to matchedProximityCharId,
// but we already know that "excessive char correction" just happened
// so that we just need to check "mProximityCount == 0".
- matchedProximityCharId = mProximityInfo->getMatchedProximityId(
+ matchedProximityCharId = mProximityInfoState.getMatchedProximityId(
mInputIndex, c, mProximityCount == 0, &proximityIndex);
}
}
- if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
- || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
- if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (UNRELATED_CHAR == matchedProximityCharId
+ || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
mAdditionalProximityMatching = true;
}
// TODO: Optimize
// As the current char turned out to be an unrelated char,
// we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
// here refers to the previous state.
- if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0
+ if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
- && isEquivalentChar(mProximityInfo->getMatchedProximityId(
+ && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion t->e
// Example:
// occaisional -> occa sional
@@ -478,7 +472,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion t->s
// Example:
// chcolate -> chocolate
@@ -490,28 +484,28 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion p->s
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char.
++mSkippedCount;
--mProximityCount;
return processSkipChar(c, isTerminal, false);
- } else if (mInputIndex - 1 < mInputLength
+ } else if (mInputIndex - 1 < mInputSize
&& mSkippedCount > 0
&& mCorrectionStates[mOutputIndex].mSkipping
&& mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
&& isProximityCharOrEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion s->a
incrementInputIndex();
--mSkippedCount;
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
- } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
+ } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// 1.2. Excessive or transpose correction
if (mTransposing) {
++mTransposedCount;
@@ -520,7 +514,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
incrementInputIndex();
}
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
@@ -536,20 +530,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// 3. Skip correction
++mSkippedCount;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return processSkipChar(c, isTerminal, false);
- } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
// As a last resort, use additional proximity characters
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
@@ -557,7 +551,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
} else {
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
@@ -567,20 +561,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
- // If inputIndex is greater than mInputLength, that means there is no
+ // If inputIndex is greater than mInputSize, that means there is no
// proximity chars. So, we don't need to check proximity.
mMatching = true;
} else if (isEquivalentChar(matchedProximityCharId)) {
mMatching = true;
++mEquivalentCharCount;
- mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
- } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+ mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
+ } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] =
- mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
+ mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex);
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
@@ -592,8 +586,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// 4. Last char excessive correction
mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0
- && mProximityCount == 0 && (mInputIndex == mInputLength - 2);
- const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
+ && mProximityCount == 0 && (mInputIndex == mInputSize - 2);
+ const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded;
if (mLastCharExceeded) {
++mExcessiveCount;
}
@@ -604,7 +598,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
- mExceeding && mInputIndex == mInputLength - 2;
+ mExceeding && mInputIndex == mInputSize - 2;
// Finally, we are ready to go to the next character, the next "virtual node".
// We should advance the input index.
@@ -620,7 +614,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mTerminalInputIndex = mInputIndex - 1;
mTerminalOutputIndex = mOutputIndex - 1;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
@@ -634,13 +628,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
}
-Correction::~Correction() {
-}
-
-inline static int getQuoteCount(const unsigned short* word, const int length) {
+inline static int getQuoteCount(const unsigned short *word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
- if(word[i] == '\'') {
+ if (word[i] == SINGLE_QUOTE) {
++quoteCount;
}
}
@@ -657,12 +648,12 @@ inline static bool isUpperCase(unsigned short c) {
/* static */
int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
- const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction,
- const int inputLength) {
+ const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction,
+ const int inputSize) {
const int excessivePos = correction->getExcessivePos();
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
- const ProximityInfo *proximityInfo = correction->mProximityInfo;
+ const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState;
const int skippedCount = correction->mSkippedCount;
const int transposedCount = correction->mTransposedCount / 2;
const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
@@ -670,55 +661,55 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
const bool lastCharExceeded = correction->mLastCharExceeded;
const bool useFullEditDistance = correction->mUseFullEditDistance;
const int outputLength = outputIndex + 1;
- if (skippedCount >= inputLength || inputLength == 0) {
+ if (skippedCount >= inputSize || inputSize == 0) {
return -1;
}
// TODO: find more robust way
- bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
- : (inputLength == inputIndex + 1);
+ bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2)
+ : (inputSize == inputIndex + 1);
// TODO: use mExcessiveCount
- const int matchCount = inputLength - correction->mProximityCount - excessiveCount;
+ const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
- const unsigned short* word = correction->mWord;
+ const unsigned short *word = correction->mWord;
const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
- - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
+ - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize));
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
if (DEBUG_DICT_FULL) {
- dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
+ dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength);
}
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
if (DEBUG_CORRECTION_FREQ
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
AKLOGI("FinalFreq0: %d", finalFreq);
}
// TODO: Optimize this.
if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
- ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
- inputLength) - transposedCount;
+ ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength,
+ inputSize) - transposedCount;
const int matchWeight = powerIntCapped(typedLetterMultiplier,
- max(inputLength, outputLength) - ed);
+ max(inputSize, outputLength) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
// TODO: Demote further if there are two or more excessive chars with longer user input?
- if (inputLength > outputLength) {
+ if (inputSize > outputLength) {
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
}
ed = max(0, ed - quoteDiffCount);
- adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
+ adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)),
proximityMatchedCount);
if (transposedCount <= 0) {
- if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
+ if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) {
// Promote a word with just one skipped or excessive char
if (sameLength) {
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
@@ -737,8 +728,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
multiplyIntCapped(matchWeight, &finalFreq);
}
- if (proximityInfo->getMatchedProximityId(0, word[0], true)
- == ProximityInfo::UNRELATED_CHAR) {
+ if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) {
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
}
@@ -748,8 +738,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// Demotion for a word with missing character
if (skipped) {
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
- * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
- / (10 * inputLength
+ * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
+ / (10 * inputSize
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
if (DEBUG_DICT_FULL) {
AKLOGI("Demotion rate for missing character is %d.", demotionRate);
@@ -764,7 +754,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// Demotion for a word with excessive character
if (excessiveCount > 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
- if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
+ if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_DICT_FULL) {
AKLOGI("Double excessive demotion");
}
@@ -775,8 +765,9 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
}
const bool performTouchPositionCorrection =
- CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
- && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
+ CALIBRATE_SCORE_BY_TOUCH_COORDINATES
+ && proximityInfoState->touchPositionCorrectionEnabled()
+ && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
// Score calibration by touch coordinates is being done only for pure-fat finger typing error
// cases.
int additionalProximityCount = 0;
@@ -795,8 +786,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
static const float MIN = 0.3f;
static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
static const float R2 = HALF_SCORE_SQUARED_RADIUS;
- const float x = (float)squaredDistance
- / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
+ const float x = static_cast<float>(squaredDistance)
+ / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
const float factor = max((x < R1)
? (A * (R1 - x) + B * x) / R1
: (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
@@ -850,7 +841,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
? adjustedProximityMatchedCount
: (proximityMatchedCount + transposedCount);
multiplyRate(
- 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
+ 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq);
// Promotion for an exactly matched word
if (ed == 0) {
@@ -885,7 +876,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
e ... exceeding
p ... proximity matching
*/
- if (matchCount == inputLength && matchCount >= 2 && !skipped
+ if (matchCount == inputSize && matchCount >= 2 && !skipped
&& word[matchCount] == word[matchCount - 1]) {
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
}
@@ -895,8 +886,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
multiplyIntCapped(fullWordMultiplier, &finalFreq);
}
- if (useFullEditDistance && outputLength > inputLength + 1) {
- const int diff = outputLength - inputLength - 1;
+ if (useFullEditDistance && outputLength > inputSize + 1) {
+ const int diff = outputLength - inputSize - 1;
const int divider = diff < 31 ? 1 << diff : S_INT_MAX;
finalFreq = divider > finalFreq ? 1 : finalFreq / divider;
}
@@ -906,8 +897,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
}
if (DEBUG_CORRECTION_FREQ
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
- DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength);
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
+ DUMP_WORD(correction->getPrimaryInputWord(), inputSize);
DUMP_WORD(correction->mWord, outputLength);
AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
skippedCount, transposedCount, excessiveCount, additionalProximityCount,
@@ -920,7 +911,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
/* static */
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount,
- const Correction* correction, const bool isSpaceProximity, const unsigned short *word) {
+ const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false;
@@ -946,7 +937,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
int totalLength = 0;
int totalFreq = 0;
- for (int i = 0; i < wordCount; ++i){
+ for (int i = 0; i < wordCount; ++i) {
const int wordLength = wordLengthArray[i];
if (wordLength <= 0) {
return 0;
@@ -1050,10 +1041,10 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
/* Damerau-Levenshtein distance */
inline static int editDistanceInternal(
- int* editDistanceTable, const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength) {
+ int *editDistanceTable, const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int* dp = editDistanceTable;
+ int *dp = editDistanceTable;
const int li = beforeLength + 1;
const int lo = afterLength + 1;
for (int i = 0; i < li; ++i) {
@@ -1089,8 +1080,8 @@ inline static int editDistanceInternal(
return dp[li * lo - 1];
}
-int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength) {
+int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength) {
int table[(beforeLength + 1) * (afterLength + 1)];
return editDistanceInternal(table, before, beforeLength, after, afterLength);
}
@@ -1099,7 +1090,7 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
// In dictionary.cpp, getSuggestion() method,
// suggestion scores are computed using the below formula.
// original score
-// := pow(mTypedLetterMultiplier (this is defined 2),
+// := powf(mTypedLetterMultiplier (this is defined 2),
// (the number of matched characters between typed word and suggested word))
// * (individual word's score which defined in the unigram dictionary,
// and this score is defined in range [0, 255].)
@@ -1111,15 +1102,15 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
// capitalization, then treat it as if the score was 255.
// - If before.length() == after.length()
// => multiply by mFullWordMultiplier (this is defined 2))
-// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
// For historical reasons we ignore the 1.2 modifier (because the measure for a good
// autocorrection threshold was done at a time when it didn't exist). This doesn't change
// the result.
-// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
/* static */
-float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength,
+float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength,
const int score) {
if (0 == beforeLength || 0 == afterLength) {
return 0;
@@ -1136,15 +1127,16 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* be
return 0;
}
- const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
- * pow((float)TYPED_LETTER_MULTIPLIER,
- (float)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+ const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
+ : static_cast<float>(MAX_INITIAL_SCORE)
+ * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
+ static_cast<float>(min(beforeLength, afterLength - spaceCount)))
+ * static_cast<float>(FULL_WORD_MULTIPLIER);
// add a weight based on edit distance.
// distance <= max(afterLength, beforeLength) == afterLength,
// so, 0 <= distance / afterLength <= 1
- const float weight = 1.0 - (float) distance / afterLength;
- return (score / maxScore) * weight;
+ const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
+ return (static_cast<float>(score) / maxScore) * weight;
}
-
} // namespace latinime
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index 3300a8491..f016d5453 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -17,11 +17,13 @@
#ifndef LATINIME_CORRECTION_H
#define LATINIME_CORRECTION_H
-#include <assert.h>
+#include <cassert>
+#include <cstring> // for memset()
#include <stdint.h>
-#include "correction_state.h"
+#include "correction_state.h"
#include "defines.h"
+#include "proximity_info_state.h"
namespace latinime {
@@ -37,10 +39,108 @@ class Correction {
NOT_ON_TERMINAL
} CorrectionType;
+ Correction()
+ : mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false),
+ mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0),
+ mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0),
+ mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0),
+ mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0),
+ mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0),
+ mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false),
+ mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false),
+ mSkipping(false), mProximityInfoState() {
+ memset(mWord, 0, sizeof(mWord));
+ memset(mDistances, 0, sizeof(mDistances));
+ memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable));
+ // NOTE: mCorrectionStates is an array of instances.
+ // No need to initialize it explicitly here.
+ }
+
+ virtual ~Correction() {}
+ void resetCorrection();
+ void initCorrection(
+ const ProximityInfo *pi, const int inputSize, const int maxWordLength);
+ void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
+
+ // TODO: remove
+ void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
+ const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
+ const bool doAutoCompletion, const int maxErrors);
+ void checkState();
+ bool sameAsTyped();
+ bool initProcessState(const int index);
+
+ int getInputIndex() const;
+
+ bool needsToPrune() const;
+
+ int pushAndGetTotalTraverseCount() {
+ return ++mTotalTraverseCount;
+ }
+
+ int getFreqForSplitMultipleWords(
+ const int *freqArray, const int *wordLengthArray, const int wordCount,
+ const bool isSpaceProximity, const unsigned short *word);
+ int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
+ int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
+ int *wordLength, const int inputSize);
+
+ CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
+
+ /////////////////////////
+ // Tree helper methods
+ int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
+
+ inline int getTreeSiblingPos(const int index) const {
+ return mCorrectionStates[index].mSiblingPos;
+ }
+
+ inline void setTreeSiblingPos(const int index, const int pos) {
+ mCorrectionStates[index].mSiblingPos = pos;
+ }
+
+ inline int getTreeParentIndex(const int index) const {
+ return mCorrectionStates[index].mParentIndex;
+ }
+
+ class RankingAlgorithm {
+ public:
+ static int calculateFinalProbability(const int inputIndex, const int depth,
+ const int probability, int *editDistanceTable, const Correction *correction,
+ const int inputSize);
+ static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
+ const int wordCount, const Correction *correction, const bool isSpaceProximity,
+ const unsigned short *word);
+ static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
+ const unsigned short *after, const int afterLength, const int score);
+ static int editDistance(const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength);
+ private:
+ static const int CODE_SPACE = ' ';
+ static const int MAX_INITIAL_SCORE = 255;
+ };
+
+ // proximity info state
+ void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
+ const int inputSize, const int *xCoordinates, const int *yCoordinates) {
+ mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
+ proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
+ }
+
+ const unsigned short *getPrimaryInputWord() const {
+ return mProximityInfoState.getPrimaryInputWord();
+ }
+
+ unsigned short getPrimaryCharAt(const int index) const {
+ return mProximityInfoState.getPrimaryCharAt(index);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Correction);
+
/////////////////////////
// static inline utils //
/////////////////////////
-
static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
@@ -93,112 +193,45 @@ class Correction {
}
}
- Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
- void resetCorrection();
- void initCorrection(
- const ProximityInfo *pi, const int inputLength, const int maxWordLength);
- void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
-
- // TODO: remove
- void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
- const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
- const bool doAutoCompletion, const int maxErrors);
- void checkState();
- bool initProcessState(const int index);
-
- int getInputIndex();
-
- virtual ~Correction();
- int getSpaceProximityPos() const {
+ inline int getSpaceProximityPos() const {
return mSpaceProximityPos;
}
- int getMissingSpacePos() const {
+ inline int getMissingSpacePos() const {
return mMissingSpacePos;
}
- int getSkipPos() const {
+ inline int getSkipPos() const {
return mSkipPos;
}
- int getExcessivePos() const {
+ inline int getExcessivePos() const {
return mExcessivePos;
}
- int getTransposedPos() const {
+ inline int getTransposedPos() const {
return mTransposedPos;
}
- bool needsToPrune() const;
-
- int pushAndGetTotalTraverseCount() {
- return ++mTotalTraverseCount;
- }
-
- int getFreqForSplitMultipleWords(
- const int *freqArray, const int *wordLengthArray, const int wordCount,
- const bool isSpaceProximity, const unsigned short *word);
- int getFinalProbability(const int probability, unsigned short **word, int* wordLength);
- int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int* wordLength, const int inputLength);
-
- CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
-
- /////////////////////////
- // Tree helper methods
- int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
-
- inline int getTreeSiblingPos(const int index) const {
- return mCorrectionStates[index].mSiblingPos;
- }
-
- inline void setTreeSiblingPos(const int index, const int pos) {
- mCorrectionStates[index].mSiblingPos = pos;
- }
-
- inline int getTreeParentIndex(const int index) const {
- return mCorrectionStates[index].mParentIndex;
- }
-
- class RankingAlgorithm {
- public:
- static int calculateFinalProbability(const int inputIndex, const int depth,
- const int probability, int *editDistanceTable, const Correction* correction,
- const int inputLength);
- static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
- const int wordCount, const Correction* correction, const bool isSpaceProximity,
- const unsigned short *word);
- static float calcNormalizedScore(const unsigned short* before, const int beforeLength,
- const unsigned short* after, const int afterLength, const int score);
- static int editDistance(const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength);
- private:
- static const int CODE_SPACE = ' ';
- static const int MAX_INITIAL_SCORE = 255;
- static const int TYPED_LETTER_MULTIPLIER = 2;
- static const int FULL_WORD_MULTIPLIER = 2;
- };
-
- private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline void startToTraverseAllNodes();
- inline bool isQuote(const unsigned short c);
+ inline bool isSingleQuote(const unsigned short c);
inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c);
inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
- int* wordLength, const int inputLength);
+ int *wordLength, const int inputSize);
- const int TYPED_LETTER_MULTIPLIER;
- const int FULL_WORD_MULTIPLIER;
+ static const int TYPED_LETTER_MULTIPLIER = 2;
+ static const int FULL_WORD_MULTIPLIER = 2;
const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance;
bool mDoAutoCompletion;
int mMaxEditDistance;
int mMaxDepth;
- int mInputLength;
+ int mInputSize;
int mSpaceProximityPos;
int mMissingSpacePos;
int mTerminalInputIndex;
@@ -240,7 +273,7 @@ class Correction {
bool mExceeding;
bool mTransposing;
bool mSkipping;
-
+ ProximityInfoState mProximityInfoState;
};
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/native/jni/src/correction_state.h b/native/jni/src/correction_state.h
index 5b2cbd3a2..a63d4aa94 100644
--- a/native/jni/src/correction_state.h
+++ b/native/jni/src/correction_state.h
@@ -79,6 +79,5 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
state->mSkipping = false;
state->mAdditionalProximityMatching = false;
}
-
} // namespace latinime
#endif // LATINIME_CORRECTION_STATE_H
diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h
deleted file mode 100644
index 376ba59d9..000000000
--- a/native/jni/src/debug.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-**
-** Copyright 2011, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
-
-#ifndef LATINIME_DEBUG_H
-#define LATINIME_DEBUG_H
-
-#include "defines.h"
-
-static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output,
- const unsigned int length) {
- unsigned int i = 0;
- for (; i <= length && input[i] != 0; ++i)
- output[i] = input[i] & 0xFF;
- output[i] = 0;
- return output;
-}
-
-static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input,
- unsigned char* output, const unsigned int length, unsigned char c) {
- unsigned int i = 0;
- for (; i <= length && input[i] != 0; ++i)
- output[i] = input[i] & 0xFF;
- if (i > 0) output[i-1] = c;
- output[i] = 0;
- return output;
-}
-
-static inline void LOGI_S16(unsigned short* string, const unsigned int length) {
- unsigned char tmp_buffer[length];
- convertToUnibyteString(string, tmp_buffer, length);
- AKLOGI(">> %s", tmp_buffer);
- // The log facility is throwing out log that comes too fast. The following
- // is a dirty way of slowing down processing so that we can see all log.
- // TODO : refactor this in a blocking log or something.
- // usleep(10);
-}
-
-static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int length,
- unsigned char c) {
- unsigned char tmp_buffer[length+1];
- convertToUnibyteStringAndReplaceLastChar(string, tmp_buffer, length, c);
- AKLOGI(">> %s", tmp_buffer);
- // Likewise
- // usleep(10);
-}
-
-static inline void printDebug(const char* tag, int* codes, int codesSize, int MAX_PROXIMITY_CHARS) {
- unsigned char *buf = (unsigned char*)malloc((1 + codesSize) * sizeof(*buf));
-
- buf[codesSize] = 0;
- while (--codesSize >= 0)
- buf[codesSize] = (unsigned char)codes[codesSize * MAX_PROXIMITY_CHARS];
- AKLOGI("%s, WORD = %s", tag, buf);
-
- free(buf);
-}
-
-#endif // LATINIME_DEBUG_H
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index cd2fc634a..ad526fb7f 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -1,42 +1,86 @@
/*
-**
-** Copyright 2010, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
+ * Copyright (C) 2010, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef LATINIME_DEFINES_H
#define LATINIME_DEFINES_H
+#include <stdint.h>
+
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
-#include <cutils/log.h>
-#define AKLOGE ALOGE
-#define AKLOGI ALOGI
+#include <android/log.h>
+#ifndef LOG_TAG
+#define LOG_TAG "LatinIME: "
+#endif
+#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
+#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
+
+#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
+ dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
+#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
+// TODO: INTS_TO_CHARS
+#define SHORTS_TO_CHARS(input, length, output) do { \
+ shortArrayToCharArray(input, length, output); } while (0)
+
+static inline void dumpWordInfo(const unsigned short *word, const int length,
+ const int rank, const int frequency) {
+ static char charBuf[50];
+ int i = 0;
+ for (; i < length; ++i) {
+ const unsigned short c = word[i];
+ if (c == 0) {
+ break;
+ }
+ // static_cast only for debugging
+ charBuf[i] = static_cast<char>(c);
+ }
+ charBuf[i] = 0;
+ if (i > 1) {
+ AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency);
+ }
+}
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
-#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0)
+static inline void dumpResult(
+ const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
+ const int maxWordLength) {
+ AKLOGI("--- DUMP RESULT ---------");
+ for (int i = 0; i < maxWordCounts; ++i) {
+ dumpWordInfo(&outWords[i * maxWordLength], maxWordLength, i, frequencies[i]);
+ }
+ AKLOGI("-------------------------");
+}
-static inline void dumpWord(const unsigned short* word, const int length) {
+static inline void dumpWord(const unsigned short *word, const int length) {
static char charBuf[50];
-
- for (int i = 0; i < length; ++i) {
- charBuf[i] = word[i];
+ int i = 0;
+ for (; i < length; ++i) {
+ const unsigned short c = word[i];
+ if (c == 0) {
+ break;
+ }
+ // static_cast only for debugging
+ charBuf[i] = static_cast<char>(c);
+ }
+ charBuf[i] = 0;
+ if (i > 1) {
+ AKLOGI("[ %s ]", charBuf);
}
- charBuf[length] = 0;
- AKLOGI("[ %s ]", charBuf);
}
-static inline void dumpWordInt(const int* word, const int length) {
+static inline void dumpWordInt(const int *word, const int length) {
static char charBuf[50];
for (int i = 0; i < length; ++i) {
@@ -46,11 +90,58 @@ static inline void dumpWordInt(const int* word, const int length) {
AKLOGI("i[ %s ]", charBuf);
}
+// TODO: Change this to intArrayToCharArray
+static inline void shortArrayToCharArray(
+ const unsigned short *input, const int length, char *output) {
+ int i = 0;
+ for (;i < length; ++i) {
+ const unsigned short c = input[i];
+ if (c == 0) {
+ break;
+ }
+ // static_cast only for debugging
+ output[i] = static_cast<char>(c);
+ }
+ output[i] = 0;
+}
+
+#ifndef __ANDROID__
+#include <cassert>
+#include <execinfo.h>
+#include <stdlib.h>
+
+#define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0)
+#define SHOW_STACK_TRACE do { showStackTrace(); } while (0)
+
+static inline void showStackTrace() {
+ void *callstack[128];
+ int i, frames = backtrace(callstack, 128);
+ char **strs = backtrace_symbols(callstack, frames);
+ for (i = 0; i < frames; ++i) {
+ if (i == 0) {
+ AKLOGI("=== Trace ===");
+ continue;
+ }
+ AKLOGI("%s", strs[i]);
+ }
+ free(strs);
+}
+#else
+#include <cassert>
+#define ASSERT(success) assert(success)
+#define SHOW_STACK_TRACE
+#endif
+
#else
#define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...)
+#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
#define DUMP_WORD(word, length)
#define DUMP_WORD_INT(word, length)
+#define ASSERT(success)
+#define SHOW_STACK_TRACE
+// TODO: INTS_TO_CHARS
+#define SHORTS_TO_CHARS(input, length, output)
#endif
#ifdef FLAG_DO_PROFILE
@@ -64,14 +155,14 @@ static unsigned int profile_counter[PROF_BUF_SIZE];
#define PROF_RESET prof_reset()
#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]
-#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while(0)
+#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0)
#define PROF_START(prof_buf_id) do { \
- PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while(0)
-#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0)
+ PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0)
+#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0)
#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
#define PROF_CLOCKOUT(prof_buf_id) \
AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
-#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
+#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0)
static inline void prof_reset(void) {
for (int i = 0; i < PROF_BUF_SIZE; ++i) {
@@ -86,17 +177,18 @@ static inline void prof_out(void) {
AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
}
AKLOGI("Total time is %6.3f ms.",
- profile_buf[PROF_BUF_SIZE - 1] * 1000 / (float)CLOCKS_PER_SEC);
+ profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC));
float all = 0;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
all += profile_buf[i];
}
if (all == 0) all = 1;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
- if (profile_buf[i] != 0) {
+ if (profile_buf[i]) {
AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
i, (profile_buf[i] * 100 / all),
- profile_buf[i] * 1000 / (float)CLOCKS_PER_SEC, profile_counter[i]);
+ profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC),
+ profile_counter[i]);
}
}
}
@@ -116,10 +208,6 @@ static inline void prof_out(void) {
#endif // FLAG_DO_PROFILE
#ifdef FLAG_DBG
-#include <cutils/log.h>
-#ifndef LOG_TAG
-#define LOG_TAG "LatinIME: "
-#endif
#define DEBUG_DICT true
#define DEBUG_DICT_FULL false
#define DEBUG_EDIT_DISTANCE false
@@ -132,6 +220,12 @@ static inline void prof_out(void) {
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
+#ifdef FLAG_FULL_DBG
+#define DEBUG_GEO_FULL true
+#else
+#define DEBUG_GEO_FULL false
+#endif
+
#else // FLAG_DBG
#define DEBUG_DICT false
@@ -146,6 +240,7 @@ static inline void prof_out(void) {
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
+#define DEBUG_GEO_FULL false
#endif // FLAG_DBG
@@ -176,15 +271,15 @@ static inline void prof_out(void) {
#define FLAG_BIGRAM_FREQ 0x7F
#define DICTIONARY_VERSION_MIN 200
-#define NOT_VALID_WORD -99
-#define NOT_A_CHARACTER -1
-#define NOT_A_DISTANCE -1
-#define NOT_A_COORDINATE -1
-#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
-#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
-#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4
-#define NOT_AN_INDEX -1
-#define NOT_A_PROBABILITY -1
+#define NOT_VALID_WORD (-99)
+#define NOT_A_CODE_POINT (-1)
+#define NOT_A_DISTANCE (-1)
+#define NOT_A_COORDINATE (-1)
+#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2)
+#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
+#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
+#define NOT_AN_INDEX (-1)
+#define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' '
@@ -225,9 +320,15 @@ static inline void prof_out(void) {
// This is only used for the size of array. Not to be used in c functions.
#define MAX_WORD_LENGTH_INTERNAL 48
+// This must be the same as ProximityInfo#MAX_PROXIMITY_CHARS_SIZE, currently it's 16.
+#define MAX_PROXIMITY_CHARS_SIZE_INTERNAL 16
+
// This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
+// Assuming locale strings such as en_US, sr-Latn etc.
+#define MAX_LOCALE_STRING_LENGTH 10
+
// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
// for better performance.
// Holds up to 1 candidate for each word
@@ -252,12 +353,18 @@ static inline void prof_out(void) {
#define FIRST_WORD_INDEX 0
+#define MAX_SPACES_INTERNAL 16
+
+// Max Distance between point to key
+#define MAX_POINT_TO_KEY_LENGTH 10000000
+
+// The max number of the keys in one keyboard layout
+#define MAX_KEY_COUNT_IN_A_KEYBOARD 64
+
// TODO: Reduce this constant if possible; check the maximum number of digraphs in the same
// word in the dictionary for languages with digraphs, like German and French
#define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5
-// Minimum suggest depth for one word for all cases except for missing space suggestions.
-#define MIN_SUGGEST_DEPTH 1
#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
@@ -286,7 +393,26 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
#define NEUTRAL_AREA_RADIUS_RATIO 1.3f
// DEBUG
-#define INPUTLENGTH_FOR_DEBUG -1
-#define MIN_OUTPUT_INDEX_FOR_DEBUG -1
-
+#define INPUTLENGTH_FOR_DEBUG (-1)
+#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+ TypeName(); \
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
+
+// Used as a return value for character comparison
+typedef enum {
+ // Same char, possibly with different case or accent
+ EQUIVALENT_CHAR,
+ // It is a char located nearby on the keyboard
+ NEAR_PROXIMITY_CHAR,
+ // It is an unrelated char
+ UNRELATED_CHAR,
+ // Additional proximity char which can differ by language.
+ ADDITIONAL_PROXIMITY_CHAR
+} ProximityType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/dic_traverse_wrapper.cpp b/native/jni/src/dic_traverse_wrapper.cpp
new file mode 100644
index 000000000..88ca9fa0d
--- /dev/null
+++ b/native/jni/src/dic_traverse_wrapper.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2012, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "LatinIME: jni: Session"
+
+#include "dic_traverse_wrapper.h"
+
+namespace latinime {
+void *(*DicTraverseWrapper::sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring) = 0;
+void (*DicTraverseWrapper::sDicTraverseSessionReleaseMethod)(void *) = 0;
+void (*DicTraverseWrapper::sDicTraverseSessionInitMethod)(
+ void *, const Dictionary *const, const int *, const int) = 0;
+} // namespace latinime
diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h
new file mode 100644
index 000000000..292382487
--- /dev/null
+++ b/native/jni/src/dic_traverse_wrapper.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2012, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_TRAVERSE_WRAPPER_H
+#define LATINIME_DIC_TRAVERSE_WRAPPER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+class Dictionary;
+// TODO: Remove
+class DicTraverseWrapper {
+ public:
+ static void *getDicTraverseSession(JNIEnv *env, jstring locale) {
+ if (sDicTraverseSessionFactoryMethod) {
+ return sDicTraverseSessionFactoryMethod(env, locale);
+ }
+ return 0;
+ }
+ static void initDicTraverseSession(void *traverseSession,
+ const Dictionary *const dictionary, const int *prevWord, const int prevWordLength) {
+ if (sDicTraverseSessionInitMethod) {
+ sDicTraverseSessionInitMethod(traverseSession, dictionary, prevWord, prevWordLength);
+ }
+ }
+ static void releaseDicTraverseSession(void *traverseSession) {
+ if (sDicTraverseSessionReleaseMethod) {
+ sDicTraverseSessionReleaseMethod(traverseSession);
+ }
+ }
+ static void setTraverseSessionFactoryMethod(
+ void *(*factoryMethod)(JNIEnv *, jstring)) {
+ sDicTraverseSessionFactoryMethod = factoryMethod;
+ }
+ static void setTraverseSessionInitMethod(
+ void (*initMethod)(void *, const Dictionary *const, const int *, const int)) {
+ sDicTraverseSessionInitMethod = initMethod;
+ }
+ static void setTraverseSessionReleaseMethod(void (*releaseMethod)(void *)) {
+ sDicTraverseSessionReleaseMethod = releaseMethod;
+ }
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseWrapper);
+ static void *(*sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring);
+ static void (*sDicTraverseSessionInitMethod)(
+ void *, const Dictionary *const, const int *, const int);
+ static void (*sDicTraverseSessionReleaseMethod)(void *);
+};
+int register_DicTraverseSession(JNIEnv *env);
+} // namespace latinime
+#endif // LATINIME_DIC_TRAVERSE_WRAPPER_H
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 1fb02478b..2fbe83e86 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -1,36 +1,44 @@
/*
-**
-** Copyright 2009, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
-
-#include <stdio.h>
+ * Copyright (C) 2009, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#define LOG_TAG "LatinIME: dictionary.cpp"
+#include <stdint.h>
+
+#include "bigram_dictionary.h"
#include "binary_format.h"
#include "defines.h"
#include "dictionary.h"
+#include "dic_traverse_wrapper.h"
+#include "gesture_decoder_wrapper.h"
+#include "unigram_dictionary.h"
namespace latinime {
// TODO: Change the type of all keyCodes to uint32_t
Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
- int typedLetterMultiplier, int fullWordMultiplier,
- int maxWordLength, int maxWords)
- : mDict((unsigned char*) dict), mDictSize(dictSize),
- mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust) {
+ int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords,
+ int maxPredictions)
+ : mDict(static_cast<unsigned char *>(dict)),
+ mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)),
+ mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
+ mUnigramDictionary(new UnigramDictionary(mOffsetDict, typedLetterMultiplier,
+ fullWordMultiplier, maxWordLength, maxWords, BinaryFormat::getFlags(mDict))),
+ mBigramDictionary(new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions)),
+ mGestureDecoder(new GestureDecoderWrapper(maxWordLength, maxWords)) {
if (DEBUG_DICT) {
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
AKLOGI("Max word length (%d) is greater than %d",
@@ -38,30 +46,56 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
}
}
- mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
- mWordsPriorityQueuePool = new WordsPriorityQueuePool(
- maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
- const unsigned int headerSize = BinaryFormat::getHeaderSize(mDict);
- const unsigned int options = BinaryFormat::getFlags(mDict);
- mUnigramDictionary = new UnigramDictionary(mDict + headerSize, typedLetterMultiplier,
- fullWordMultiplier, maxWordLength, maxWords, options);
- mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength, this);
}
Dictionary::~Dictionary() {
- delete mCorrection;
- delete mWordsPriorityQueuePool;
delete mUnigramDictionary;
delete mBigramDictionary;
+ delete mGestureDecoder;
}
-int Dictionary::getFrequency(const int32_t *word, int length) {
+int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds,
+ int *codes, int codesSize, int *prevWordChars,
+ int prevWordLength, int commitPoint, bool isGesture,
+ bool useFullEditDistance, unsigned short *outWords,
+ int *frequencies, int *spaceIndices, int *outputTypes) const {
+ int result = 0;
+ if (isGesture) {
+ DicTraverseWrapper::initDicTraverseSession(
+ traverseSession, this, prevWordChars, prevWordLength);
+ result = mGestureDecoder->getSuggestions(proximityInfo, traverseSession,
+ xcoordinates, ycoordinates, times, pointerIds, codes, codesSize, commitPoint,
+ outWords, frequencies, spaceIndices, outputTypes);
+ if (DEBUG_DICT) {
+ DUMP_RESULT(outWords, frequencies, 18 /* MAX_WORDS */, MAX_WORD_LENGTH_INTERNAL);
+ }
+ return result;
+ } else {
+ std::map<int, int> bigramMap;
+ uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
+ mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
+ prevWordLength, &bigramMap, bigramFilter);
+ result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates,
+ ycoordinates, codes, codesSize, &bigramMap, bigramFilter,
+ useFullEditDistance, outWords, frequencies, outputTypes);
+ return result;
+ }
+}
+
+int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
+ unsigned short *outWords, int *frequencies, int *outputTypes) const {
+ if (length <= 0) return 0;
+ return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
+ outputTypes);
+}
+
+int Dictionary::getFrequency(const int32_t *word, int length) const {
return mUnigramDictionary->getFrequency(word, length);
}
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
- int length2) {
+ int length2) const {
return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
}
-
} // namespace latinime
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index 9f2367904..a1358890d 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -17,55 +17,63 @@
#ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H
-#include <map>
+#include <stdint.h>
-#include "bigram_dictionary.h"
-#include "char_utils.h"
-#include "correction.h"
#include "defines.h"
-#include "proximity_info.h"
-#include "unigram_dictionary.h"
-#include "words_priority_queue_pool.h"
namespace latinime {
+class BigramDictionary;
+class IncrementalDecoderInterface;
+class ProximityInfo;
+class UnigramDictionary;
+
class Dictionary {
public:
+ // Taken from SuggestedWords.java
+ const static int KIND_TYPED = 0; // What user typed
+ const static int KIND_CORRECTION = 1; // Simple correction/suggestion
+ const static int KIND_COMPLETION = 2; // Completion (suggestion with appended chars)
+ const static int KIND_WHITELIST = 3; // Whitelisted word
+ const static int KIND_BLACKLIST = 4; // Blacklisted word
+ const static int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation
+ const static int KIND_APP_DEFINED = 6; // Suggested by the application
+ const static int KIND_SHORTCUT = 7; // A shortcut
+ const static int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
+
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
- int fullWordMultiplier, int maxWordLength, int maxWords);
+ int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions);
- int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
- int *codes, int codesSize, const int32_t* prevWordChars, const int prevWordLength,
- bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
- std::map<int, int> bigramMap;
- uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
- mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
- prevWordLength, &bigramMap, bigramFilter);
- return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
- mCorrection, xcoordinates, ycoordinates, codes, codesSize, &bigramMap,
- bigramFilter, useFullEditDistance, outWords, frequencies);
- }
+ int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
+ int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
+ int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
+ bool useFullEditDistance, unsigned short *outWords,
+ int *frequencies, int *spaceIndices, int *outputTypes) const;
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
- unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
- return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
- maxWordLength, maxBigrams);
- }
+ unsigned short *outWords, int *frequencies, int *outputTypes) const;
- int getFrequency(const int32_t *word, int length);
- bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
- void *getDict() { return (void *)mDict; }
- int getDictSize() { return mDictSize; }
- int getMmapFd() { return mMmapFd; }
- int getDictBufAdjust() { return mDictBufAdjust; }
- ~Dictionary();
+ int getFrequency(const int32_t *word, int length) const;
+ bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
+ const uint8_t *getDict() const { // required to release dictionary buffer
+ return mDict;
+ }
+ const uint8_t *getOffsetDict() const {
+ return mOffsetDict;
+ }
+ int getDictSize() const { return mDictSize; }
+ int getMmapFd() const { return mMmapFd; }
+ int getDictBufAdjust() const { return mDictBufAdjust; }
+ virtual ~Dictionary();
// public static utility methods
// static inline methods should be defined in the header file
static int wideStrLen(unsigned short *str);
private:
- const unsigned char *mDict;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+ const uint8_t *mDict;
+ const uint8_t *mOffsetDict;
// Used only for the mmap version of dictionary loading, but we use these as dummy variables
// also for the malloc version.
@@ -73,21 +81,21 @@ class Dictionary {
const int mMmapFd;
const int mDictBufAdjust;
- UnigramDictionary *mUnigramDictionary;
- BigramDictionary *mBigramDictionary;
- WordsPriorityQueuePool *mWordsPriorityQueuePool;
- Correction *mCorrection;
+ const UnigramDictionary *mUnigramDictionary;
+ const BigramDictionary *mBigramDictionary;
+ IncrementalDecoderInterface *mGestureDecoder;
};
// public static utility methods
// static inline methods should be defined in the header file
inline int Dictionary::wideStrLen(unsigned short *str) {
if (!str) return 0;
- unsigned short *end = str;
- while (*end)
- end++;
- return end - str;
+ int length = 0;
+ while (*str) {
+ str++;
+ length++;
+ }
+ return length;
}
} // namespace latinime
-
#endif // LATINIME_DICTIONARY_H
diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h
new file mode 100644
index 000000000..3892b46a0
--- /dev/null
+++ b/native/jni/src/geometry_utils.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_GEOMETRY_UTILS_H
+#define LATINIME_GEOMETRY_UTILS_H
+
+#include <cmath>
+
+#define MAX_PATHS 2
+
+#define DEBUG_DECODER false
+
+#define M_PI_F 3.14159265f
+
+#define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \
+ ? (floorf((f) * 10000.0f) / 10000.0f) : (f)
+
+#define SQUARE_FLOAT(x) ((x) * (x))
+
+namespace latinime {
+
+static inline float getSquaredDistanceFloat(float x1, float y1, float x2, float y2) {
+ const float deltaX = x1 - x2;
+ const float deltaY = y1 - y2;
+ return SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY);
+}
+
+static inline float getDistanceFloat(float x1, float y1, float x2, float y2) {
+ return hypotf(x1 - x2, y1 - y2);
+}
+
+static inline int getDistanceInt(int x1, int y1, int x2, int y2) {
+ return static_cast<int>(getDistanceFloat(static_cast<float>(x1), static_cast<float>(y1),
+ static_cast<float>(x2), static_cast<float>(y2)));
+}
+
+static inline float getAngle(int x1, int y1, int x2, int y2) {
+ const int dx = x1 - x2;
+ const int dy = y1 - y2;
+ if (dx == 0 && dy == 0) return 0;
+ return atan2f(static_cast<float>(dy), static_cast<float>(dx));
+}
+
+static inline float getAngleDiff(float a1, float a2) {
+ const float deltaA = fabsf(a1 - a2);
+ const float diff = ROUND_FLOAT_10000(deltaA);
+ if (diff > M_PI_F) {
+ const float normalizedDiff = 2.0f * M_PI_F - diff;
+ return ROUND_FLOAT_10000(normalizedDiff);
+ }
+ return diff;
+}
+
+static inline float pointToLineSegSquaredDistanceFloat(
+ float x, float y, float x1, float y1, float x2, float y2, bool extend) {
+ const float ray1x = x - x1;
+ const float ray1y = y - y1;
+ const float ray2x = x2 - x1;
+ const float ray2y = y2 - y1;
+
+ const float dotProduct = ray1x * ray2x + ray1y * ray2y;
+ const float lineLengthSqr = SQUARE_FLOAT(ray2x) + SQUARE_FLOAT(ray2y);
+ const float projectionLengthSqr = dotProduct / lineLengthSqr;
+
+ float projectionX;
+ float projectionY;
+ if (!extend && projectionLengthSqr < 0.0f) {
+ projectionX = x1;
+ projectionY = y1;
+ } else if (!extend && projectionLengthSqr > 1.0f) {
+ projectionX = x2;
+ projectionY = y2;
+ } else {
+ projectionX = x1 + projectionLengthSqr * ray2x;
+ projectionY = y1 + projectionLengthSqr * ray2y;
+ }
+ return getSquaredDistanceFloat(x, y, projectionX, projectionY);
+}
+} // namespace latinime
+#endif // LATINIME_GEOMETRY_UTILS_H
diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.cpp b/native/jni/src/gesture/gesture_decoder_wrapper.cpp
new file mode 100644
index 000000000..afbe0c5c3
--- /dev/null
+++ b/native/jni/src/gesture/gesture_decoder_wrapper.cpp
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gesture_decoder_wrapper.h"
+
+namespace latinime {
+ IncrementalDecoderInterface *
+ (*GestureDecoderWrapper::sGestureDecoderFactoryMethod)(int, int) = 0;
+} // namespace latinime
diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h
new file mode 100644
index 000000000..92e1ded49
--- /dev/null
+++ b/native/jni/src/gesture/gesture_decoder_wrapper.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_GESTURE_DECODER_WRAPPER_H
+#define LATINIME_GESTURE_DECODER_WRAPPER_H
+
+#include <stdint.h>
+#include "defines.h"
+#include "incremental_decoder_interface.h"
+
+namespace latinime {
+
+class UnigramDictionary;
+class BigramDictionary;
+class ProximityInfo;
+
+class GestureDecoderWrapper : public IncrementalDecoderInterface {
+ public:
+ GestureDecoderWrapper(const int maxWordLength, const int maxWords)
+ : mIncrementalDecoderInterface(getGestureDecoderInstance(maxWordLength, maxWords)) {
+ }
+
+ virtual ~GestureDecoderWrapper() {
+ delete mIncrementalDecoderInterface;
+ }
+
+ int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
+ int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
+ unsigned short *outWords, int *frequencies, int *outputIndices,
+ int *outputTypes) const {
+ if (!mIncrementalDecoderInterface) {
+ return 0;
+ }
+ return mIncrementalDecoderInterface->getSuggestions(
+ pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
+ inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
+ }
+
+ static void setGestureDecoderFactoryMethod(
+ IncrementalDecoderInterface *(*factoryMethod)(int, int)) {
+ sGestureDecoderFactoryMethod = factoryMethod;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(GestureDecoderWrapper);
+ static IncrementalDecoderInterface *getGestureDecoderInstance(int maxWordLength, int maxWords) {
+ if (sGestureDecoderFactoryMethod) {
+ return sGestureDecoderFactoryMethod(maxWordLength, maxWords);
+ }
+ return 0;
+ }
+
+ static IncrementalDecoderInterface *(*sGestureDecoderFactoryMethod)(int, int);
+ IncrementalDecoderInterface *mIncrementalDecoderInterface;
+};
+} // namespace latinime
+#endif // LATINIME_GESTURE_DECODER_WRAPPER_H
diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/gesture/incremental_decoder_interface.h
new file mode 100644
index 000000000..d1395aab9
--- /dev/null
+++ b/native/jni/src/gesture/incremental_decoder_interface.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_INCREMENTAL_DECODER_INTERFACE_H
+#define LATINIME_INCREMENTAL_DECODER_INTERFACE_H
+
+#include <stdint.h>
+#include "defines.h"
+
+namespace latinime {
+
+class UnigramDictionary;
+class BigramDictionary;
+class ProximityInfo;
+
+class IncrementalDecoderInterface {
+ public:
+ virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession,
+ int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes,
+ int inputSize, int commitPoint, unsigned short *outWords, int *frequencies,
+ int *outputIndices, int *outputTypes) const = 0;
+ IncrementalDecoderInterface() { };
+ virtual ~IncrementalDecoderInterface() { };
+ private:
+ DISALLOW_COPY_AND_ASSIGN(IncrementalDecoderInterface);
+};
+} // namespace latinime
+#endif // LATINIME_INCREMENTAL_DECODER_INTERFACE_H
diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.cpp b/native/jni/src/gesture/incremental_decoder_wrapper.cpp
new file mode 100644
index 000000000..8fcda6c9e
--- /dev/null
+++ b/native/jni/src/gesture/incremental_decoder_wrapper.cpp
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "incremental_decoder_wrapper.h"
+
+namespace latinime {
+ IncrementalDecoderInterface *
+ (*IncrementalDecoderWrapper::sIncrementalDecoderFactoryMethod)(int, int) = 0;
+} // namespace latinime
diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h
new file mode 100644
index 000000000..da7afdb8a
--- /dev/null
+++ b/native/jni/src/gesture/incremental_decoder_wrapper.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_INCREMENTAL_DECODER_WRAPPER_H
+#define LATINIME_INCREMENTAL_DECODER_WRAPPER_H
+
+#include <stdint.h>
+#include "defines.h"
+#include "incremental_decoder_interface.h"
+
+namespace latinime {
+
+class UnigramDictionary;
+class BigramDictionary;
+class ProximityInfo;
+
+class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
+ public:
+ IncrementalDecoderWrapper(const int maxWordLength, const int maxWords)
+ : mIncrementalDecoderInterface(getIncrementalDecoderInstance(maxWordLength, maxWords)) {
+ }
+
+ virtual ~IncrementalDecoderWrapper() {
+ delete mIncrementalDecoderInterface;
+ }
+
+ int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
+ int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
+ unsigned short *outWords, int *frequencies, int *outputIndices,
+ int *outputTypes) const {
+ if (!mIncrementalDecoderInterface) {
+ return 0;
+ }
+ return mIncrementalDecoderInterface->getSuggestions(
+ pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
+ inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
+ }
+
+ static void setIncrementalDecoderFactoryMethod(
+ IncrementalDecoderInterface *(*factoryMethod)(int, int)) {
+ sIncrementalDecoderFactoryMethod = factoryMethod;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(IncrementalDecoderWrapper);
+ static IncrementalDecoderInterface *getIncrementalDecoderInstance(int maxWordLength,
+ int maxWords) {
+ if (sIncrementalDecoderFactoryMethod) {
+ return sIncrementalDecoderFactoryMethod(maxWordLength, maxWords);
+ }
+ return 0;
+ }
+
+ static IncrementalDecoderInterface *(*sIncrementalDecoderFactoryMethod)(int, int);
+ IncrementalDecoderInterface *mIncrementalDecoderInterface;
+};
+} // namespace latinime
+#endif // LATINIME_INCREMENTAL_DECODER_WRAPPER_H
diff --git a/native/jni/src/hash_map_compat.h b/native/jni/src/hash_map_compat.h
new file mode 100644
index 000000000..116359a73
--- /dev/null
+++ b/native/jni/src/hash_map_compat.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2012, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HASH_MAP_COMPAT_H
+#define LATINIME_HASH_MAP_COMPAT_H
+
+// TODO: Use std::unordered_map that has been standardized in C++11
+
+#ifdef __APPLE__
+#include <ext/hash_map>
+#else // __APPLE__
+#include <hash_map>
+#endif // __APPLE__
+
+#ifdef __SGI_STL_PORT
+#define hash_map_compat stlport::hash_map
+#else // __SGI_STL_PORT
+#define hash_map_compat __gnu_cxx::hash_map
+#endif // __SGI_STL_PORT
+
+#endif // LATINIME_HASH_MAP_COMPAT_H
diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp
index 80e14fbb1..c9f83b62c 100644
--- a/native/jni/src/proximity_info.cpp
+++ b/native/jni/src/proximity_info.cpp
@@ -14,93 +14,86 @@
* limitations under the License.
*/
-#include <assert.h>
-#include <stdio.h>
-#include <string>
+#include <cassert>
+#include <cmath>
+#include <cstring>
#define LOG_TAG "LatinIME: proximity_info.cpp"
#include "additional_proximity_chars.h"
+#include "char_utils.h"
#include "defines.h"
-#include "dictionary.h"
+#include "geometry_utils.h"
+#include "jni.h"
#include "proximity_info.h"
namespace latinime {
-inline void copyOrFillZero(void *to, const void *from, size_t size) {
- if (from) {
- memcpy(to, from, size);
- } else {
- memset(to, 0, size);
+/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
+
+static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len,
+ jint *buffer) {
+ if (jArray && buffer) {
+ env->GetIntArrayRegion(jArray, 0, len, buffer);
+ } else if (buffer) {
+ memset(buffer, 0, len * sizeof(jint));
}
}
-const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
+static inline void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray, jsize len,
+ jfloat *buffer) {
+ if (jArray && buffer) {
+ env->GetFloatArrayRegion(jArray, 0, len, buffer);
+ } else if (buffer) {
+ memset(buffer, 0, len * sizeof(jfloat));
+ }
+}
-ProximityInfo::ProximityInfo(const std::string localeStr, const int maxProximityCharsSize,
+ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int maxProximityCharsSize,
const int keyboardWidth, const int keyboardHeight, const int gridWidth,
- const int gridHeight, const int mostCommonKeyWidth,
- const int32_t *proximityCharsArray, const int keyCount, const int32_t *keyXCoordinates,
- const int32_t *keyYCoordinates, const int32_t *keyWidths, const int32_t *keyHeights,
- const int32_t *keyCharCodes, const float *sweetSpotCenterXs, const float *sweetSpotCenterYs,
- const float *sweetSpotRadii)
- : MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize),
- GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight),
+ const int gridHeight, const int mostCommonKeyWidth, const jintArray proximityChars,
+ const int keyCount, const jintArray keyXCoordinates, const jintArray keyYCoordinates,
+ const jintArray keyWidths, const jintArray keyHeights, const jintArray keyCharCodes,
+ const jfloatArray sweetSpotCenterXs, const jfloatArray sweetSpotCenterYs,
+ const jfloatArray sweetSpotRadii)
+ : MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize), GRID_WIDTH(gridWidth),
+ GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth),
MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidth * mostCommonKeyWidth),
CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth),
CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight),
KEY_COUNT(min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)),
+ KEYBOARD_WIDTH(keyboardWidth), KEYBOARD_HEIGHT(keyboardHeight),
HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
&& keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
&& sweetSpotCenterYs && sweetSpotRadii),
- mLocaleStr(localeStr),
- mInputXCoordinates(0), mInputYCoordinates(0),
- mTouchPositionCorrectionEnabled(false) {
+ mProximityCharsArray(new int32_t[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
+ /* proximityGridLength */]),
+ mCodeToKeyMap() {
const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
- mProximityCharsArray = new int32_t[proximityGridLength];
- mInputCodes = new int32_t[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH_INTERNAL];
if (DEBUG_PROXIMITY_INFO) {
AKLOGI("Create proximity info array %d", proximityGridLength);
}
- memcpy(mProximityCharsArray, proximityCharsArray,
- proximityGridLength * sizeof(mProximityCharsArray[0]));
- const int normalizedSquaredDistancesLength =
- MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH_INTERNAL;
- mNormalizedSquaredDistances = new int[normalizedSquaredDistancesLength];
- for (int i = 0; i < normalizedSquaredDistancesLength; ++i) {
- mNormalizedSquaredDistances[i] = NOT_A_DISTANCE;
- }
-
- copyOrFillZero(mKeyXCoordinates, keyXCoordinates, KEY_COUNT * sizeof(mKeyXCoordinates[0]));
- copyOrFillZero(mKeyYCoordinates, keyYCoordinates, KEY_COUNT * sizeof(mKeyYCoordinates[0]));
- copyOrFillZero(mKeyWidths, keyWidths, KEY_COUNT * sizeof(mKeyWidths[0]));
- copyOrFillZero(mKeyHeights, keyHeights, KEY_COUNT * sizeof(mKeyHeights[0]));
- copyOrFillZero(mKeyCharCodes, keyCharCodes, KEY_COUNT * sizeof(mKeyCharCodes[0]));
- copyOrFillZero(mSweetSpotCenterXs, sweetSpotCenterXs,
- KEY_COUNT * sizeof(mSweetSpotCenterXs[0]));
- copyOrFillZero(mSweetSpotCenterYs, sweetSpotCenterYs,
- KEY_COUNT * sizeof(mSweetSpotCenterYs[0]));
- copyOrFillZero(mSweetSpotRadii, sweetSpotRadii, KEY_COUNT * sizeof(mSweetSpotRadii[0]));
-
- initializeCodeToKeyIndex();
-}
-
-// Build the reversed look up table from the char code to the index in mKeyXCoordinates,
-// mKeyYCoordinates, mKeyWidths, mKeyHeights, mKeyCharCodes.
-void ProximityInfo::initializeCodeToKeyIndex() {
- memset(mCodeToKeyIndex, -1, (MAX_CHAR_CODE + 1) * sizeof(mCodeToKeyIndex[0]));
- for (int i = 0; i < KEY_COUNT; ++i) {
- const int code = mKeyCharCodes[i];
- if (0 <= code && code <= MAX_CHAR_CODE) {
- mCodeToKeyIndex[code] = i;
- }
+ const jsize localeCStrUtf8Length = env->GetStringUTFLength(localeJStr);
+ if (localeCStrUtf8Length >= MAX_LOCALE_STRING_LENGTH) {
+ AKLOGI("Locale string length too long: length=%d", localeCStrUtf8Length);
+ assert(false);
}
+ memset(mLocaleStr, 0, sizeof(mLocaleStr));
+ env->GetStringUTFRegion(localeJStr, 0, env->GetStringLength(localeJStr), mLocaleStr);
+ safeGetOrFillZeroIntArrayRegion(env, proximityChars, proximityGridLength, mProximityCharsArray);
+ safeGetOrFillZeroIntArrayRegion(env, keyXCoordinates, KEY_COUNT, mKeyXCoordinates);
+ safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
+ safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths);
+ safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights);
+ safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCodePoints);
+ safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
+ safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
+ safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
+ initializeG();
}
ProximityInfo::~ProximityInfo() {
- delete[] mNormalizedSquaredDistances;
delete[] mProximityCharsArray;
- delete[] mInputCodes;
}
inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) const {
@@ -112,7 +105,8 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) {
if (DEBUG_DICT) {
AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
- assert(false);
+ // TODO: Enable this assertion.
+ //assert(false);
}
return false;
}
@@ -121,24 +115,33 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (DEBUG_PROXIMITY_INFO) {
AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
}
+ int32_t *proximityCharsArray = mProximityCharsArray;
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]);
}
- if (mProximityCharsArray[startIndex + i] == KEYCODE_SPACE) {
+ if (proximityCharsArray[startIndex + i] == KEYCODE_SPACE) {
return true;
}
}
return false;
}
-bool ProximityInfo::isOnKey(const int keyId, const int x, const int y) const {
- if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case
- const int left = mKeyXCoordinates[keyId];
- const int top = mKeyYCoordinates[keyId];
- const int right = left + mKeyWidths[keyId] + 1;
- const int bottom = top + mKeyHeights[keyId];
- return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom;
+static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float x2, float y2,
+ float scale) {
+ const float deltaX = x1 - x2;
+ const float deltaY = y1 - y2;
+ return (SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY)) / SQUARE_FLOAT(scale);
+}
+
+float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloat(
+ const int keyId, const int x, const int y) const {
+ const float centerX = static_cast<float>(getKeyCenterXOfKeyIdG(keyId));
+ const float centerY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId));
+ const float touchX = static_cast<float>(x);
+ const float touchY = static_cast<float>(y);
+ const float keyWidth = static_cast<float>(getMostCommonKeyWidth());
+ return getNormalizedSquaredDistanceFloat(centerX, centerY, touchX, touchY, keyWidth);
}
int ProximityInfo::squaredDistanceToEdge(const int keyId, const int x, const int y) const {
@@ -156,16 +159,17 @@ int ProximityInfo::squaredDistanceToEdge(const int keyId, const int x, const int
void ProximityInfo::calculateNearbyKeyCodes(
const int x, const int y, const int32_t primaryKey, int *inputCodes) const {
+ int32_t *proximityCharsArray = mProximityCharsArray;
int insertPos = 0;
inputCodes[insertPos++] = primaryKey;
const int startIndex = getStartIndexFromCoordinates(x, y);
if (startIndex >= 0) {
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
- const int32_t c = mProximityCharsArray[startIndex + i];
+ const int32_t c = proximityCharsArray[startIndex + i];
if (c < KEYCODE_SPACE || c == primaryKey) {
continue;
}
- const int keyIndex = getKeyIndex(c);
+ const int keyIndex = getKeyIndexOf(c);
const bool onKey = isOnKey(keyIndex, x, y);
const int distance = squaredDistanceToEdge(keyIndex, x, y);
if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) {
@@ -179,7 +183,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
}
}
const int additionalProximitySize =
- AdditionalProximityChars::getAdditionalCharsSize(&mLocaleStr, primaryKey);
+ AdditionalProximityChars::getAdditionalCharsSize(mLocaleStr, primaryKey);
if (additionalProximitySize > 0) {
inputCodes[insertPos++] = ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE;
if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
@@ -189,13 +193,13 @@ void ProximityInfo::calculateNearbyKeyCodes(
return;
}
- const int32_t* additionalProximityChars =
- AdditionalProximityChars::getAdditionalChars(&mLocaleStr, primaryKey);
+ const int32_t *additionalProximityChars =
+ AdditionalProximityChars::getAdditionalChars(mLocaleStr, primaryKey);
for (int j = 0; j < additionalProximitySize; ++j) {
const int32_t ac = additionalProximityChars[j];
int k = 0;
for (; k < insertPos; ++k) {
- if ((int)ac == inputCodes[k]) {
+ if (static_cast<int>(ac) == inputCodes[k]) {
break;
}
}
@@ -214,256 +218,78 @@ void ProximityInfo::calculateNearbyKeyCodes(
}
// Add a delimiter for the proximity characters
for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
- inputCodes[i] = NOT_A_CODE;
- }
-}
-
-void ProximityInfo::setInputParams(const int32_t* inputCodes, const int inputLength,
- const int* xCoordinates, const int* yCoordinates) {
- memset(mInputCodes, 0,
- MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE * sizeof(mInputCodes[0]));
-
- for (int i = 0; i < inputLength; ++i) {
- const int32_t primaryKey = inputCodes[i];
- const int x = xCoordinates[i];
- const int y = yCoordinates[i];
- int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE];
- calculateNearbyKeyCodes(x, y, primaryKey, proximities);
- }
-
- if (DEBUG_PROXIMITY_CHARS) {
- for (int i = 0; i < inputLength; ++i) {
- AKLOGI("---");
- for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE; ++j) {
- int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE + j];
- int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE + j];
- icc+= 0;
- icfjc += 0;
- AKLOGI("--- (%d)%c,%c", i, icc, icfjc);
- AKLOGI("--- A<%d>,B<%d>", icc, icfjc);
- }
- }
- }
- //Keep for debug, sorry
- //for (int i = 0; i < MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE; ++i) {
- //if (i < inputLength * MAX_PROXIMITY_CHARS_SIZE) {
- //mInputCodes[i] = mInputCodesFromJava[i];
- //} else {
- // mInputCodes[i] = 0;
- // }
- //}
- mInputXCoordinates = xCoordinates;
- mInputYCoordinates = yCoordinates;
- mTouchPositionCorrectionEnabled =
- HAS_TOUCH_POSITION_CORRECTION_DATA && xCoordinates && yCoordinates;
- mInputLength = inputLength;
- for (int i = 0; i < inputLength; ++i) {
- mPrimaryInputWord[i] = getPrimaryCharAt(i);
- }
- mPrimaryInputWord[inputLength] = 0;
- if (DEBUG_PROXIMITY_CHARS) {
- AKLOGI("--- setInputParams");
- }
- for (int i = 0; i < mInputLength; ++i) {
- const int *proximityChars = getProximityCharsAt(i);
- const int primaryKey = proximityChars[0];
- const int x = xCoordinates[i];
- const int y = yCoordinates[i];
- if (DEBUG_PROXIMITY_CHARS) {
- int a = x + y + primaryKey;
- a += 0;
- AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
- // Keep debug code just in case
- //int proximities[50];
- //for (int m = 0; m < 50; ++m) {
- //proximities[m] = 0;
- //}
- //calculateNearbyKeyCodes(x, y, primaryKey, proximities);
- //for (int l = 0; l < 50 && proximities[l] > 0; ++l) {
- //if (DEBUG_PROXIMITY_CHARS) {
- //AKLOGI("--- native Proximity (%d) = %c", l, proximities[l]);
- //}
- //}
- }
- for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityChars[j] > 0; ++j) {
- const int currentChar = proximityChars[j];
- const float squaredDistance = hasInputCoordinates()
- ? calculateNormalizedSquaredDistance(getKeyIndex(currentChar), i)
- : NOT_A_DISTANCE_FLOAT;
- if (squaredDistance >= 0.0f) {
- mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
- (int)(squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
- } else {
- mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = (j == 0)
- ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO
- : PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
- }
- if (DEBUG_PROXIMITY_CHARS) {
- AKLOGI("--- Proximity (%d) = %c", j, currentChar);
- }
- }
- }
-}
-
-inline float square(const float x) { return x * x; }
-
-float ProximityInfo::calculateNormalizedSquaredDistance(
- const int keyIndex, const int inputIndex) const {
- if (keyIndex == NOT_AN_INDEX) {
- return NOT_A_DISTANCE_FLOAT;
- }
- if (!hasSweetSpotData(keyIndex)) {
- return NOT_A_DISTANCE_FLOAT;
- }
- if (NOT_A_COORDINATE == mInputXCoordinates[inputIndex]) {
- return NOT_A_DISTANCE_FLOAT;
+ inputCodes[i] = NOT_A_CODE_POINT;
}
- const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(keyIndex, inputIndex);
- const float squaredRadius = square(mSweetSpotRadii[keyIndex]);
- return squaredDistance / squaredRadius;
-}
-
-bool ProximityInfo::hasInputCoordinates() const {
- return mInputXCoordinates && mInputYCoordinates;
}
-int ProximityInfo::getKeyIndex(const int c) const {
+int ProximityInfo::getKeyIndexOf(const int c) const {
if (KEY_COUNT == 0) {
// We do not have the coordinate data
return NOT_AN_INDEX;
}
- const unsigned short baseLowerC = toBaseLowerCase(c);
- if (baseLowerC > MAX_CHAR_CODE) {
- return NOT_AN_INDEX;
+ const int baseLowerC = static_cast<int>(toBaseLowerCase(c));
+ hash_map_compat<int, int>::const_iterator mapPos = mCodeToKeyMap.find(baseLowerC);
+ if (mapPos != mCodeToKeyMap.end()) {
+ return mapPos->second;
}
- return mCodeToKeyIndex[baseLowerC];
-}
-
-float ProximityInfo::calculateSquaredDistanceFromSweetSpotCenter(
- const int keyIndex, const int inputIndex) const {
- const float sweetSpotCenterX = mSweetSpotCenterXs[keyIndex];
- const float sweetSpotCenterY = mSweetSpotCenterYs[keyIndex];
- const float inputX = (float)mInputXCoordinates[inputIndex];
- const float inputY = (float)mInputYCoordinates[inputIndex];
- return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY);
+ return NOT_AN_INDEX;
}
-inline const int* ProximityInfo::getProximityCharsAt(const int index) const {
- return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE);
-}
-
-unsigned short ProximityInfo::getPrimaryCharAt(const int index) const {
- return getProximityCharsAt(index)[0];
+int ProximityInfo::getCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToCodePointG[keyIndex];
}
-inline bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
- const int *chars = getProximityCharsAt(index);
- int i = 0;
- while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
- if (chars[i++] == c) {
- return true;
+void ProximityInfo::initializeG() {
+ // TODO: Optimize
+ for (int i = 0; i < KEY_COUNT; ++i) {
+ const int code = mKeyCodePoints[i];
+ const int lowerCode = toBaseLowerCase(code);
+ mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
+ mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
+ mCodeToKeyMap[lowerCode] = i;
+ mKeyIndexToCodePointG[i] = lowerCode;
+ }
+ for (int i = 0; i < KEY_COUNT; i++) {
+ mKeyKeyDistancesG[i][i] = 0;
+ for (int j = i + 1; j < KEY_COUNT; j++) {
+ mKeyKeyDistancesG[i][j] = getDistanceInt(
+ mCenterXsG[i], mCenterYsG[i], mCenterXsG[j], mCenterYsG[j]);
+ mKeyKeyDistancesG[j][i] = mKeyKeyDistancesG[i][j];
}
}
- return false;
}
-bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
- if (index < 0 || index >= mInputLength) return false;
- const int currentChar = getPrimaryCharAt(index);
- const int leftIndex = index - 1;
- if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
- return true;
- }
- const int rightIndex = index + 1;
- if (rightIndex < mInputLength && existsCharInProximityAt(rightIndex, currentChar)) {
- return true;
- }
- return false;
+int ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const {
+ return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode));
}
-// In the following function, c is the current character of the dictionary word
-// currently examined.
-// currentChars is an array containing the keys close to the character the
-// user actually typed at the same position. We want to see if c is in it: if so,
-// then the word contains at that position a character close to what the user
-// typed.
-// What the user typed is actually the first character of the array.
-// proximityIndex is a pointer to the variable where getMatchedProximityId returns
-// the index of c in the proximity chars of the input index.
-// Notice : accented characters do not have a proximity list, so they are alone
-// in their list. The non-accented version of the character should be considered
-// "close", but not the other keys close to the non-accented version.
-ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int index,
- const unsigned short c, const bool checkProximityChars, int *proximityIndex) const {
- const int *currentChars = getProximityCharsAt(index);
- const int firstChar = currentChars[0];
- const unsigned short baseLowerC = toBaseLowerCase(c);
+int ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const {
+ return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode));
+}
- // The first char in the array is what user typed. If it matches right away,
- // that means the user typed that same char for this pos.
- if (firstChar == baseLowerC || firstChar == c) {
- return EQUIVALENT_CHAR;
+int ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const {
+ if (keyId >= 0) {
+ return mCenterXsG[keyId];
}
+ return 0;
+}
- if (!checkProximityChars) return UNRELATED_CHAR;
-
- // If the non-accented, lowercased version of that first character matches c,
- // then we have a non-accented version of the accented character the user
- // typed. Treat it as a close char.
- if (toBaseLowerCase(firstChar) == baseLowerC)
- return NEAR_PROXIMITY_CHAR;
-
- // Not an exact nor an accent-alike match: search the list of close keys
- int j = 1;
- while (j < MAX_PROXIMITY_CHARS_SIZE
- && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
- if (matched) {
- if (proximityIndex) {
- *proximityIndex = j;
- }
- return NEAR_PROXIMITY_CHAR;
- }
- ++j;
+int ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const {
+ if (keyId >= 0) {
+ return mCenterYsG[keyId];
}
- if (j < MAX_PROXIMITY_CHARS_SIZE
- && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- ++j;
- while (j < MAX_PROXIMITY_CHARS_SIZE
- && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
- if (matched) {
- if (proximityIndex) {
- *proximityIndex = j;
- }
- return ADDITIONAL_PROXIMITY_CHAR;
- }
- ++j;
- }
- }
-
- // Was not included, signal this as an unrelated character.
- return UNRELATED_CHAR;
+ return 0;
}
-bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const {
- if (length != mInputLength) {
- return false;
+int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const {
+ const int keyId0 = getKeyIndexOf(key0);
+ const int keyId1 = getKeyIndexOf(key1);
+ if (keyId0 >= 0 && keyId1 >= 0) {
+ return mKeyKeyDistancesG[keyId0][keyId1];
}
- const int *inputCodes = mInputCodes;
- while (length--) {
- if ((unsigned int) *inputCodes != (unsigned int) *word) {
- return false;
- }
- inputCodes += MAX_PROXIMITY_CHARS_SIZE;
- word++;
- }
- return true;
+ return MAX_POINT_TO_KEY_LENGTH;
}
-
-const int ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
-const int ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
-const int ProximityInfo::MAX_KEY_COUNT_IN_A_KEYBOARD;
-const int ProximityInfo::MAX_CHAR_CODE;
-
} // namespace latinime
diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h
index 3a2511cf1..0d8c6a3ca 100644
--- a/native/jni/src/proximity_info.h
+++ b/native/jni/src/proximity_info.h
@@ -18,113 +18,152 @@
#define LATINIME_PROXIMITY_INFO_H
#include <stdint.h>
-#include <string>
#include "defines.h"
+#include "hash_map_compat.h"
+#include "jni.h"
namespace latinime {
class Correction;
+inline bool isSkippableChar(const uint16_t character) {
+ // TODO: Do not hardcode here
+ return character == '\'' || character == '-';
+}
+
class ProximityInfo {
public:
- static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
- static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
- 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
-
- // Used as a return value for character comparison
- typedef enum {
- // Same char, possibly with different case or accent
- EQUIVALENT_CHAR,
- // It is a char located nearby on the keyboard
- NEAR_PROXIMITY_CHAR,
- // It is an unrelated char
- UNRELATED_CHAR,
- // Additional proximity char which can differ by language.
- ADDITIONAL_PROXIMITY_CHAR
- } ProximityType;
-
- ProximityInfo(const std::string localeStr, const int maxProximityCharsSize,
+ ProximityInfo(JNIEnv *env, const jstring localeJStr, const int maxProximityCharsSize,
const int keyboardWidth, const int keyboardHeight, const int gridWidth,
- const int gridHeight, const int mostCommonkeyWidth,
- const int32_t *proximityCharsArray, const int keyCount, const int32_t *keyXCoordinates,
- const int32_t *keyYCoordinates, const int32_t *keyWidths, const int32_t *keyHeights,
- const int32_t *keyCharCodes, const float *sweetSpotCenterXs,
- const float *sweetSpotCenterYs, const float *sweetSpotRadii);
+ const int gridHeight, const int mostCommonKeyWidth, const jintArray proximityChars,
+ const int keyCount, const jintArray keyXCoordinates, const jintArray keyYCoordinates,
+ const jintArray keyWidths, const jintArray keyHeights, const jintArray keyCharCodes,
+ const jfloatArray sweetSpotCenterXs, const jfloatArray sweetSpotCenterYs,
+ const jfloatArray sweetSpotRadii);
~ProximityInfo();
bool hasSpaceProximity(const int x, const int y) const;
- void setInputParams(const int32_t *inputCodes, const int inputLength,
- const int *xCoordinates, const int *yCoordinates);
- const int* getProximityCharsAt(const int index) const;
- unsigned short getPrimaryCharAt(const int index) const;
- bool existsCharInProximityAt(const int index, const int c) const;
- bool existsAdjacentProximityChars(const int index) const;
- ProximityType getMatchedProximityId(const int index, const unsigned short c,
- const bool checkProximityChars, int *proximityIndex = 0) const;
- int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const {
- return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
- }
+ int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const;
+ float getNormalizedSquaredDistanceFromCenterFloat(
+ const int keyId, const int x, const int y) const;
bool sameAsTyped(const unsigned short *word, int length) const;
- const unsigned short* getPrimaryInputWord() const {
- return mPrimaryInputWord;
+ int getKeyIndexOf(const int c) const;
+ int getCodePointOf(const int keyIndex) const;
+ bool hasSweetSpotData(const int keyIndex) const {
+ // When there are no calibration data for a key,
+ // the radius of the key is assigned to zero.
+ return mSweetSpotRadii[keyIndex] > 0.0f;
+ }
+ float getSweetSpotRadiiAt(int keyIndex) const {
+ return mSweetSpotRadii[keyIndex];
+ }
+ float getSweetSpotCenterXAt(int keyIndex) const {
+ return mSweetSpotCenterXs[keyIndex];
+ }
+ float getSweetSpotCenterYAt(int keyIndex) const {
+ return mSweetSpotCenterYs[keyIndex];
+ }
+ void calculateNearbyKeyCodes(
+ const int x, const int y, const int32_t primaryKey, int *inputCodes) const;
+
+ bool hasTouchPositionCorrectionData() const {
+ return HAS_TOUCH_POSITION_CORRECTION_DATA;
+ }
+
+ int getMostCommonKeyWidth() const {
+ return MOST_COMMON_KEY_WIDTH;
+ }
+
+ int getMostCommonKeyWidthSquare() const {
+ return MOST_COMMON_KEY_WIDTH_SQUARE;
+ }
+
+ const char *getLocaleStr() const {
+ return mLocaleStr;
+ }
+
+ int getKeyCount() const {
+ return KEY_COUNT;
+ }
+
+ int getCellHeight() const {
+ return CELL_HEIGHT;
+ }
+
+ int getCellWidth() const {
+ return CELL_WIDTH;
}
- bool touchPositionCorrectionEnabled() const {
- return mTouchPositionCorrectionEnabled;
+
+ int getGridWidth() const {
+ return GRID_WIDTH;
+ }
+
+ int getGridHeight() const {
+ return GRID_HEIGHT;
}
+ int getKeyboardWidth() const {
+ return KEYBOARD_WIDTH;
+ }
+
+ int getKeyboardHeight() const {
+ return KEYBOARD_HEIGHT;
+ }
+
+ int getKeyCenterXOfCodePointG(int charCode) const;
+ int getKeyCenterYOfCodePointG(int charCode) const;
+ int getKeyCenterXOfKeyIdG(int keyId) const;
+ int getKeyCenterYOfKeyIdG(int keyId) const;
+ int getKeyKeyDistanceG(int key0, int key1) const;
+
private:
- // The max number of the keys in one keyboard layout
- static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
- // The upper limit of the char code in mCodeToKeyIndex
- static const int MAX_CHAR_CODE = 127;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
static const float NOT_A_DISTANCE_FLOAT;
- static const int NOT_A_CODE = -1;
int getStartIndexFromCoordinates(const int x, const int y) const;
- void initializeCodeToKeyIndex();
+ void initializeG();
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const;
bool hasInputCoordinates() const;
- int getKeyIndex(const int c) const;
- bool hasSweetSpotData(const int keyIndex) const {
- // When there are no calibration data for a key,
- // the radius of the key is assigned to zero.
- return mSweetSpotRadii[keyIndex] > 0.0;
- }
- bool isOnKey(const int keyId, const int x, const int y) const;
int squaredDistanceToEdge(const int keyId, const int x, const int y) const;
- void calculateNearbyKeyCodes(
- const int x, const int y, const int32_t primaryKey, int *inputCodes) const;
+ bool isOnKey(const int keyId, const int x, const int y) const {
+ if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case
+ const int left = mKeyXCoordinates[keyId];
+ const int top = mKeyYCoordinates[keyId];
+ const int right = left + mKeyWidths[keyId] + 1;
+ const int bottom = top + mKeyHeights[keyId];
+ return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom;
+ }
const int MAX_PROXIMITY_CHARS_SIZE;
const int GRID_WIDTH;
const int GRID_HEIGHT;
+ const int MOST_COMMON_KEY_WIDTH;
const int MOST_COMMON_KEY_WIDTH_SQUARE;
const int CELL_WIDTH;
const int CELL_HEIGHT;
const int KEY_COUNT;
+ const int KEYBOARD_WIDTH;
+ const int KEYBOARD_HEIGHT;
const bool HAS_TOUCH_POSITION_CORRECTION_DATA;
- const std::string mLocaleStr;
- int32_t *mInputCodes;
- const int *mInputXCoordinates;
- const int *mInputYCoordinates;
- bool mTouchPositionCorrectionEnabled;
+ char mLocaleStr[MAX_LOCALE_STRING_LENGTH];
int32_t *mProximityCharsArray;
- int *mNormalizedSquaredDistances;
int32_t mKeyXCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int32_t mKeyCharCodes[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int32_t mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int mInputLength;
- unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
- int mCodeToKeyIndex[MAX_CHAR_CODE + 1];
-};
+ hash_map_compat<int, int> mCodeToKeyMap;
+ int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
+ // TODO: move to correction.h
+};
} // namespace latinime
-
#endif // LATINIME_PROXIMITY_INFO_H
diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp
new file mode 100644
index 000000000..c5f2884c6
--- /dev/null
+++ b/native/jni/src/proximity_info_state.cpp
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstring> // for memset()
+#include <stdint.h>
+
+#define LOG_TAG "LatinIME: proximity_info_state.cpp"
+
+#include "defines.h"
+#include "geometry_utils.h"
+#include "proximity_info.h"
+#include "proximity_info_state.h"
+
+namespace latinime {
+
+const int ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
+const int ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
+ 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
+const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
+const int ProximityInfoState::NOT_A_CODE = -1;
+
+void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
+ const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int *const pointerIds, const bool isGeometric) {
+
+ if (isGeometric) {
+ mIsContinuationPossible = checkAndReturnIsContinuationPossible(
+ inputSize, xCoordinates, yCoordinates, times);
+ } else {
+ mIsContinuationPossible = false;
+ }
+
+ mProximityInfo = proximityInfo;
+ mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData();
+ mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare();
+ mLocaleStr = proximityInfo->getLocaleStr();
+ mKeyCount = proximityInfo->getKeyCount();
+ mCellHeight = proximityInfo->getCellHeight();
+ mCellWidth = proximityInfo->getCellWidth();
+ mGridHeight = proximityInfo->getGridWidth();
+ mGridWidth = proximityInfo->getGridHeight();
+
+ memset(mInputCodes, 0, sizeof(mInputCodes));
+
+ if (!isGeometric && pointerId == 0) {
+ // Initialize
+ // - mInputCodes
+ // - mNormalizedSquaredDistances
+ // TODO: Merge
+ for (int i = 0; i < inputSize; ++i) {
+ const int32_t primaryKey = inputCodes[i];
+ const int x = xCoordinates[i];
+ const int y = yCoordinates[i];
+ int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
+ mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities);
+ }
+
+ if (DEBUG_PROXIMITY_CHARS) {
+ for (int i = 0; i < inputSize; ++i) {
+ AKLOGI("---");
+ for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL; ++j) {
+ int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j];
+ int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j];
+ icc += 0;
+ icfjc += 0;
+ AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc);
+ }
+ }
+ }
+ }
+
+ ///////////////////////
+ // Setup touch points
+ int pushTouchPointStartIndex = 0;
+ int lastSavedInputSize = 0;
+ mMaxPointToKeyLength = maxPointToKeyLength;
+ if (mIsContinuationPossible && mInputIndice.size() > 1) {
+ // Just update difference.
+ // Two points prior is never skipped. Thus, we pop 2 input point data here.
+ pushTouchPointStartIndex = mInputIndice[mInputIndice.size() - 2];
+ popInputData();
+ popInputData();
+ lastSavedInputSize = mInputXs.size();
+ } else {
+ // Clear all data.
+ mInputXs.clear();
+ mInputYs.clear();
+ mTimes.clear();
+ mInputIndice.clear();
+ mLengthCache.clear();
+ mDistanceCache.clear();
+ mNearKeysVector.clear();
+ }
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d",
+ pushTouchPointStartIndex, lastSavedInputSize);
+ }
+ mInputSize = 0;
+
+ if (xCoordinates && yCoordinates) {
+ const bool proximityOnly = !isGeometric && (xCoordinates[0] < 0 || yCoordinates[0] < 0);
+ int lastInputIndex = pushTouchPointStartIndex;
+ for (int i = lastInputIndex; i < inputSize; ++i) {
+ const int pid = pointerIds ? pointerIds[i] : 0;
+ if (pointerId == pid) {
+ lastInputIndex = i;
+ }
+ }
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: last input index = %d", lastInputIndex);
+ }
+ // Working space to save near keys distances for current, prev and prevprev input point.
+ NearKeysDistanceMap nearKeysDistances[3];
+ // These pointers are swapped for each inputs points.
+ NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0];
+ NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1];
+ NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2];
+
+ for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) {
+ // Assuming pointerId == 0 if pointerIds is null.
+ const int pid = pointerIds ? pointerIds[i] : 0;
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
+ }
+ if (pointerId == pid) {
+ const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i);
+ const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
+ const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
+ const int time = times ? times[i] : -1;
+ if (pushTouchPoint(i, c, x, y, time, isGeometric /* do sampling */,
+ i == lastInputIndex, currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances)) {
+ // Previous point information was popped.
+ NearKeysDistanceMap *tmp = prevNearKeysDistances;
+ prevNearKeysDistances = currentNearKeysDistances;
+ currentNearKeysDistances = tmp;
+ } else {
+ NearKeysDistanceMap *tmp = prevPrevNearKeysDistances;
+ prevPrevNearKeysDistances = prevNearKeysDistances;
+ prevNearKeysDistances = currentNearKeysDistances;
+ currentNearKeysDistances = tmp;
+ }
+ }
+ }
+ mInputSize = mInputXs.size();
+ }
+
+ if (mInputSize > 0) {
+ const int keyCount = mProximityInfo->getKeyCount();
+ mNearKeysVector.resize(mInputSize);
+ mDistanceCache.resize(mInputSize * keyCount);
+ for (int i = lastSavedInputSize; i < mInputSize; ++i) {
+ mNearKeysVector[i].reset();
+ static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f;
+ for (int k = 0; k < keyCount; ++k) {
+ const int index = i * keyCount + k;
+ const int x = mInputXs[i];
+ const int y = mInputYs[i];
+ const float normalizedSquaredDistance =
+ mProximityInfo->getNormalizedSquaredDistanceFromCenterFloat(k, x, y);
+ mDistanceCache[index] = normalizedSquaredDistance;
+ if (normalizedSquaredDistance < NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) {
+ mNearKeysVector[i].set(k, 1);
+ }
+ }
+ }
+
+ static const float READ_FORWORD_LENGTH_SCALE = 0.95f;
+ const int readForwordLength = static_cast<int>(
+ hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight())
+ * READ_FORWORD_LENGTH_SCALE);
+ for (int i = 0; i < mInputSize; ++i) {
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i],
+ mTimes[i]);
+ }
+ for (int j = max(i + 1, lastSavedInputSize); j < mInputSize; ++j) {
+ if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) {
+ break;
+ }
+ mNearKeysVector[i] |= mNearKeysVector[j];
+ }
+ }
+ }
+
+ // end
+ ///////////////////////
+
+ memset(mNormalizedSquaredDistances, NOT_A_DISTANCE, sizeof(mNormalizedSquaredDistances));
+ memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
+ mTouchPositionCorrectionEnabled = mInputSize > 0 && mHasTouchPositionCorrectionData
+ && xCoordinates && yCoordinates && !isGeometric;
+ if (!isGeometric && pointerId == 0) {
+ for (int i = 0; i < inputSize; ++i) {
+ mPrimaryInputWord[i] = getPrimaryCharAt(i);
+ }
+
+ for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
+ const int *proximityChars = getProximityCharsAt(i);
+ const int primaryKey = proximityChars[0];
+ const int x = xCoordinates[i];
+ const int y = yCoordinates[i];
+ if (DEBUG_PROXIMITY_CHARS) {
+ int a = x + y + primaryKey;
+ a += 0;
+ AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
+ }
+ for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
+ const int currentChar = proximityChars[j];
+ const float squaredDistance =
+ hasInputCoordinates() ? calculateNormalizedSquaredDistance(
+ mProximityInfo->getKeyIndexOf(currentChar), i) :
+ NOT_A_DISTANCE_FLOAT;
+ if (squaredDistance >= 0.0f) {
+ mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
+ (int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
+ } else {
+ mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
+ (j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO :
+ PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
+ }
+ if (DEBUG_PROXIMITY_CHARS) {
+ AKLOGI("--- Proximity (%d) = %c", j, currentChar);
+ }
+ }
+ }
+ }
+
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("ProximityState init finished: %d points out of %d", mInputSize, inputSize);
+ }
+}
+
+bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times) {
+ for (int i = 0; i < mInputSize; ++i) {
+ const int index = mInputIndice[i];
+ if (index > inputSize || xCoordinates[index] != mInputXs[i] ||
+ yCoordinates[index] != mInputYs[i] || times[index] != mTimes[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Calculating point to key distance for all near keys and returning the distance between
+// the given point and the nearest key position.
+float ProximityInfoState::updateNearKeysDistances(const int x, const int y,
+ NearKeysDistanceMap *const currentNearKeysDistances) {
+ static const float NEAR_KEY_THRESHOLD = 4.0f;
+
+ currentNearKeysDistances->clear();
+ const int keyCount = mProximityInfo->getKeyCount();
+ float nearestKeyDistance = mMaxPointToKeyLength;
+ for (int k = 0; k < keyCount; ++k) {
+ const float dist = mProximityInfo->getNormalizedSquaredDistanceFromCenterFloat(k, x, y);
+ if (dist < NEAR_KEY_THRESHOLD) {
+ currentNearKeysDistances->insert(std::pair<int, float>(k, dist));
+ }
+ if (nearestKeyDistance > dist) {
+ nearestKeyDistance = dist;
+ }
+ }
+ return nearestKeyDistance;
+}
+
+// Check if previous point is at local minimum position to near keys.
+bool ProximityInfoState::isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) const {
+ static const float MARGIN = 0.01f;
+
+ for (NearKeysDistanceMap::const_iterator it = prevNearKeysDistances->begin();
+ it != prevNearKeysDistances->end(); ++it) {
+ NearKeysDistanceMap::const_iterator itPP = prevPrevNearKeysDistances->find(it->first);
+ NearKeysDistanceMap::const_iterator itC = currentNearKeysDistances->find(it->first);
+ if ((itPP == prevPrevNearKeysDistances->end() || itPP->second > it->second + MARGIN)
+ && (itC == currentNearKeysDistances->end() || itC->second > it->second + MARGIN)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Calculating a point score that indicates usefulness of the point.
+float ProximityInfoState::getPointScore(
+ const int x, const int y, const int time, const bool lastPoint, const float nearest,
+ const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) const {
+ static const int DISTANCE_BASE_SCALE = 100;
+ static const int SAVE_DISTANCE_SCALE = 200;
+ static const int SKIP_DISTANCE_SCALE = 25;
+ static const int CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE = 40;
+ static const int STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE = 50;
+ static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 27;
+ static const float SAVE_DISTANCE_SCORE = 2.0f;
+ static const float SKIP_DISTANCE_SCORE = -1.0f;
+ static const float CHECK_LOCALMIN_DISTANCE_SCORE = -1.0f;
+ static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F / 36.0f;
+ static const float STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD = 0.5f;
+ static const float STRAIGHT_SKIP_SCORE = -1.0f;
+ static const float CORNER_ANGLE_THRESHOLD = M_PI_F / 2.0f;
+ static const float CORNER_SCORE = 1.0f;
+
+ const std::size_t size = mInputXs.size();
+ if (size <= 1) {
+ return 0.0f;
+ }
+ const int baseSampleRate = mProximityInfo->getMostCommonKeyWidth();
+ const int distNext = getDistanceInt(x, y, mInputXs.back(), mInputYs.back())
+ * DISTANCE_BASE_SCALE;
+ const int distPrev = getDistanceInt(mInputXs.back(), mInputYs.back(),
+ mInputXs[size - 2], mInputYs[size - 2]) * DISTANCE_BASE_SCALE;
+ float score = 0.0f;
+
+ // Sum of distances
+ if (distPrev + distNext > baseSampleRate * SAVE_DISTANCE_SCALE) {
+ score += SAVE_DISTANCE_SCORE;
+ }
+ // Distance
+ if (distPrev < baseSampleRate * SKIP_DISTANCE_SCALE) {
+ score += SKIP_DISTANCE_SCORE;
+ }
+ // Location
+ if (distPrev < baseSampleRate * CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE) {
+ if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances)) {
+ score += CHECK_LOCALMIN_DISTANCE_SCORE;
+ }
+ }
+ // Angle
+ const float angle1 = getAngle(x, y, mInputXs.back(), mInputYs.back());
+ const float angle2 = getAngle(mInputXs.back(), mInputYs.back(),
+ mInputXs[size - 2], mInputYs[size - 2]);
+ const float angleDiff = getAngleDiff(angle1, angle2);
+ // Skip straight
+ if (nearest > STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD
+ && distPrev < baseSampleRate * STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE
+ && angleDiff < STRAIGHT_ANGLE_THRESHOLD) {
+ score += STRAIGHT_SKIP_SCORE;
+ }
+ // Save corner
+ if (distPrev > baseSampleRate * CORNER_CHECK_DISTANCE_THRESHOLD_SCALE
+ && angleDiff > CORNER_ANGLE_THRESHOLD) {
+ score += CORNER_SCORE;
+ }
+ return score;
+}
+
+// Sampling touch point and pushing information to vectors.
+// Returning if previous point is popped or not.
+bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y,
+ const int time, const bool sample, const bool isLastPoint,
+ NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
+ static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f;
+
+ size_t size = mInputXs.size();
+ bool popped = false;
+ if (nodeChar < 0 && sample) {
+ const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
+ const float score = getPointScore(x, y, time, isLastPoint, nearest,
+ currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
+ if (score < 0) {
+ // Pop previous point because it would be useless.
+ popInputData();
+ size = mInputXs.size();
+ popped = true;
+ } else {
+ popped = false;
+ }
+ // Check if the last point should be skipped.
+ if (isLastPoint) {
+ if (size > 0 && getDistanceFloat(x, y, mInputXs.back(), mInputYs.back())
+ < mProximityInfo->getMostCommonKeyWidth() * LAST_POINT_SKIP_DISTANCE_SCALE) {
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %f, "
+ "width = %f", size, x, y, mInputXs.back(), mInputYs.back(),
+ getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()),
+ mProximityInfo->getMostCommonKeyWidth()
+ * LAST_POINT_SKIP_DISTANCE_SCALE);
+ }
+ return popped;
+ } else if (size > 1) {
+ int minChar = 0;
+ float minDist = mMaxPointToKeyLength;
+ for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin();
+ it != currentNearKeysDistances->end(); ++it) {
+ if (minDist > it->second) {
+ minChar = it->first;
+ minDist = it->second;
+ }
+ }
+ NearKeysDistanceMap::const_iterator itPP =
+ prevNearKeysDistances->find(minChar);
+ if (itPP != prevNearKeysDistances->end() && minDist > itPP->second) {
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("p1: char = %c, minDist = %f, prevNear key minDist = %f",
+ minChar, itPP->second, minDist);
+ }
+ return popped;
+ }
+ }
+ }
+ }
+
+ if (nodeChar >= 0 && (x < 0 || y < 0)) {
+ const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
+ if (keyId >= 0) {
+ x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
+ y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
+ }
+ }
+
+ // Pushing point information.
+ if (size > 0) {
+ mLengthCache.push_back(
+ mLengthCache.back() + getDistanceInt(x, y, mInputXs.back(), mInputYs.back()));
+ } else {
+ mLengthCache.push_back(0);
+ }
+ mInputXs.push_back(x);
+ mInputYs.push_back(y);
+ mTimes.push_back(time);
+ mInputIndice.push_back(inputIndex);
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("pushTouchPoint: x = %03d, y = %03d, time = %d, index = %d, popped ? %01d",
+ x, y, time, inputIndex, popped);
+ }
+ return popped;
+}
+
+float ProximityInfoState::calculateNormalizedSquaredDistance(
+ const int keyIndex, const int inputIndex) const {
+ if (keyIndex == NOT_AN_INDEX) {
+ return NOT_A_DISTANCE_FLOAT;
+ }
+ if (!mProximityInfo->hasSweetSpotData(keyIndex)) {
+ return NOT_A_DISTANCE_FLOAT;
+ }
+ if (NOT_A_COORDINATE == mInputXs[inputIndex]) {
+ return NOT_A_DISTANCE_FLOAT;
+ }
+ const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(
+ keyIndex, inputIndex);
+ const float squaredRadius = square(mProximityInfo->getSweetSpotRadiiAt(keyIndex));
+ return squaredDistance / squaredRadius;
+}
+
+int ProximityInfoState::getDuration(const int index) const {
+ if (mInputSize > 0 && index >= 0 && index < mInputSize - 1) {
+ return mTimes[index + 1] - mTimes[index];
+ }
+ return 0;
+}
+
+float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint,
+ const float scale) const {
+ const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
+ return min(mDistanceCache[index] * scale, mMaxPointToKeyLength);
+ }
+ if (isSkippableChar(codePoint)) {
+ return 0;
+ }
+ // If the char is not a key on the keyboard then return the max length.
+ return MAX_POINT_TO_KEY_LENGTH;
+}
+
+int ProximityInfoState::getSpaceY() const {
+ const int keyId = mProximityInfo->getKeyIndexOf(' ');
+ return mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
+}
+
+float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter(
+ const int keyIndex, const int inputIndex) const {
+ const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex);
+ const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex);
+ const float inputX = static_cast<float>(mInputXs[inputIndex]);
+ const float inputY = static_cast<float>(mInputYs[inputIndex]);
+ return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY);
+}
+
+// Puts possible characters into filter and returns new filter size.
+int32_t ProximityInfoState::getAllPossibleChars(
+ const size_t index, int32_t *const filter, const int32_t filterSize) const {
+ if (index >= mInputXs.size()) {
+ return filterSize;
+ }
+ int newFilterSize = filterSize;
+ for (int j = 0; j < mProximityInfo->getKeyCount(); ++j) {
+ if (mNearKeysVector[index].test(j)) {
+ const int32_t keyCodePoint = mProximityInfo->getCodePointOf(j);
+ bool insert = true;
+ // TODO: Avoid linear search
+ for (int k = 0; k < filterSize; ++k) {
+ if (filter[k] == keyCodePoint) {
+ insert = false;
+ break;
+ }
+ }
+ if (insert) {
+ filter[newFilterSize++] = keyCodePoint;
+ }
+ }
+ }
+ return newFilterSize;
+}
+
+float ProximityInfoState::getAveragePointDuration() const {
+ if (mInputSize == 0) {
+ return 0.0f;
+ }
+ return static_cast<float>(mTimes[mInputSize - 1] - mTimes[0]) / static_cast<float>(mInputSize);
+}
+
+void ProximityInfoState::popInputData() {
+ mInputXs.pop_back();
+ mInputYs.pop_back();
+ mTimes.pop_back();
+ mLengthCache.pop_back();
+ mInputIndice.pop_back();
+}
+
+} // namespace latinime
diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h
new file mode 100644
index 000000000..48862a7c7
--- /dev/null
+++ b/native/jni/src/proximity_info_state.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_STATE_H
+#define LATINIME_PROXIMITY_INFO_STATE_H
+
+#include <bitset>
+#include <cstring> // for memset()
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "char_utils.h"
+#include "defines.h"
+#include "hash_map_compat.h"
+
+namespace latinime {
+
+class ProximityInfo;
+
+class ProximityInfoState {
+ public:
+ typedef std::bitset<MAX_KEY_COUNT_IN_A_KEYBOARD> NearKeycodesSet;
+ static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
+ static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
+ static const float NOT_A_DISTANCE_FLOAT;
+ static const int NOT_A_CODE;
+
+ /////////////////////////////////////////
+ // Defined in proximity_info_state.cpp //
+ /////////////////////////////////////////
+ void initInputParams(const int pointerId, const float maxPointToKeyLength,
+ const ProximityInfo *proximityInfo, const int32_t *const inputCodes,
+ const int inputSize, const int *xCoordinates, const int *yCoordinates,
+ const int *const times, const int *const pointerIds, const bool isGeometric);
+
+ /////////////////////////////////////////
+ // Defined here //
+ /////////////////////////////////////////
+ ProximityInfoState()
+ : mProximityInfo(0), mMaxPointToKeyLength(0),
+ mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(),
+ mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0),
+ mIsContinuationPossible(false), mInputXs(), mInputYs(), mTimes(), mInputIndice(),
+ mDistanceCache(), mLengthCache(), mNearKeysVector(),
+ mTouchPositionCorrectionEnabled(false), mInputSize(0) {
+ memset(mInputCodes, 0, sizeof(mInputCodes));
+ memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances));
+ memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
+ }
+
+ virtual ~ProximityInfoState() {}
+
+ inline unsigned short getPrimaryCharAt(const int index) const {
+ return getProximityCharsAt(index)[0];
+ }
+
+ inline bool existsCharInProximityAt(const int index, const int c) const {
+ const int *chars = getProximityCharsAt(index);
+ int i = 0;
+ while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
+ if (chars[i++] == c) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ inline bool existsAdjacentProximityChars(const int index) const {
+ if (index < 0 || index >= mInputSize) return false;
+ const int currentChar = getPrimaryCharAt(index);
+ const int leftIndex = index - 1;
+ if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
+ return true;
+ }
+ const int rightIndex = index + 1;
+ if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) {
+ return true;
+ }
+ return false;
+ }
+
+ // In the following function, c is the current character of the dictionary word
+ // currently examined.
+ // currentChars is an array containing the keys close to the character the
+ // user actually typed at the same position. We want to see if c is in it: if so,
+ // then the word contains at that position a character close to what the user
+ // typed.
+ // What the user typed is actually the first character of the array.
+ // proximityIndex is a pointer to the variable where getMatchedProximityId returns
+ // the index of c in the proximity chars of the input index.
+ // Notice : accented characters do not have a proximity list, so they are alone
+ // in their list. The non-accented version of the character should be considered
+ // "close", but not the other keys close to the non-accented version.
+ inline ProximityType getMatchedProximityId(const int index,
+ const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
+ const int *currentChars = getProximityCharsAt(index);
+ const int firstChar = currentChars[0];
+ const unsigned short baseLowerC = toBaseLowerCase(c);
+
+ // The first char in the array is what user typed. If it matches right away,
+ // that means the user typed that same char for this pos.
+ if (firstChar == baseLowerC || firstChar == c) {
+ return EQUIVALENT_CHAR;
+ }
+
+ if (!checkProximityChars) return UNRELATED_CHAR;
+
+ // If the non-accented, lowercased version of that first character matches c,
+ // then we have a non-accented version of the accented character the user
+ // typed. Treat it as a close char.
+ if (toBaseLowerCase(firstChar) == baseLowerC)
+ return NEAR_PROXIMITY_CHAR;
+
+ // Not an exact nor an accent-alike match: search the list of close keys
+ int j = 1;
+ while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
+ && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return NEAR_PROXIMITY_CHAR;
+ }
+ ++j;
+ }
+ if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
+ && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ ++j;
+ while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
+ && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return ADDITIONAL_PROXIMITY_CHAR;
+ }
+ ++j;
+ }
+ }
+
+ // Was not included, signal this as an unrelated character.
+ return UNRELATED_CHAR;
+ }
+
+ inline int getNormalizedSquaredDistance(
+ const int inputIndex, const int proximityIndex) const {
+ return mNormalizedSquaredDistances[
+ inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
+ }
+
+ inline const unsigned short *getPrimaryInputWord() const {
+ return mPrimaryInputWord;
+ }
+
+ inline bool touchPositionCorrectionEnabled() const {
+ return mTouchPositionCorrectionEnabled;
+ }
+
+ inline bool sameAsTyped(const unsigned short *word, int length) const {
+ if (length != mInputSize) {
+ return false;
+ }
+ const int *inputCodes = mInputCodes;
+ while (length--) {
+ if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
+ return false;
+ }
+ inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
+ word++;
+ }
+ return true;
+ }
+
+ int getDuration(const int index) const;
+
+ bool isUsed() const {
+ return mInputSize > 0;
+ }
+
+ uint32_t size() const {
+ return mInputSize;
+ }
+
+ int getInputX(const int index) const {
+ return mInputXs[index];
+ }
+
+ int getInputY(const int index) const {
+ return mInputYs[index];
+ }
+
+ int getLengthCache(const int index) const {
+ return mLengthCache[index];
+ }
+
+ bool isContinuationPossible() const {
+ return mIsContinuationPossible;
+ }
+
+ float getPointToKeyLength(const int inputIndex, const int charCode, const float scale) const;
+
+ int getSpaceY() const;
+
+ int32_t getAllPossibleChars(
+ const size_t startIndex, int32_t *const filter, const int32_t filterSize) const;
+
+ float getAveragePointDuration() const;
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
+ typedef hash_map_compat<int, float> NearKeysDistanceMap;
+ /////////////////////////////////////////
+ // Defined in proximity_info_state.cpp //
+ /////////////////////////////////////////
+ float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
+
+ float calculateSquaredDistanceFromSweetSpotCenter(
+ const int keyIndex, const int inputIndex) const;
+
+ bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time,
+ const bool sample, const bool isLastPoint,
+ NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances);
+ /////////////////////////////////////////
+ // Defined here //
+ /////////////////////////////////////////
+ inline float square(const float x) const { return x * x; }
+
+ bool hasInputCoordinates() const {
+ return mInputXs.size() > 0 && mInputYs.size() > 0;
+ }
+
+ inline const int *getProximityCharsAt(const int index) const {
+ return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
+ }
+
+ float updateNearKeysDistances(const int x, const int y,
+ NearKeysDistanceMap *const currentNearKeysDistances);
+ bool isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) const;
+ float getPointScore(
+ const int x, const int y, const int time, const bool last, const float nearest,
+ const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) const;
+ bool checkAndReturnIsContinuationPossible(const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *const times);
+ void popInputData();
+
+ // const
+ const ProximityInfo *mProximityInfo;
+ float mMaxPointToKeyLength;
+ bool mHasTouchPositionCorrectionData;
+ int mMostCommonKeyWidthSquare;
+ std::string mLocaleStr;
+ int mKeyCount;
+ int mCellHeight;
+ int mCellWidth;
+ int mGridHeight;
+ int mGridWidth;
+ bool mIsContinuationPossible;
+
+ std::vector<int> mInputXs;
+ std::vector<int> mInputYs;
+ std::vector<int> mTimes;
+ std::vector<int> mInputIndice;
+ std::vector<float> mDistanceCache;
+ std::vector<int> mLengthCache;
+ std::vector<NearKeycodesSet> mNearKeysVector;
+ bool mTouchPositionCorrectionEnabled;
+ int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
+ int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
+ int mInputSize;
+ unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_STATE_H
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index 9a803cca1..53ae385ea 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
#define LATINIME_TERMINAL_ATTRIBUTES_H
-#include "unigram_dictionary.h"
+#include "binary_format.h"
namespace latinime {
@@ -29,14 +29,14 @@ namespace latinime {
class TerminalAttributes {
public:
class ShortcutIterator {
- const uint8_t* const mDict;
- bool mHasNextShortcutTarget;
+ const uint8_t *const mDict;
int mPos;
+ bool mHasNextShortcutTarget;
public:
- ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict),
- mPos(pos) {
- mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
+ ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags)
+ : mDict(dict), mPos(pos),
+ mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) {
}
inline bool hasNextShortcutTarget() const {
@@ -46,29 +46,24 @@ class TerminalAttributes {
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
// TODO: make the output an uint32_t* to handle the whole unicode range.
- inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
mHasNextShortcutTarget =
- 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
+ 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
- const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos);
- if (NOT_A_CHARACTER == charCode) break;
- outWord[i] = (uint16_t)charCode;
+ const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
+ if (NOT_A_CODE_POINT == codePoint) break;
+ outWord[i] = (uint16_t)codePoint;
}
+ *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
return i;
}
};
- private:
- const uint8_t* const mDict;
- const uint8_t mFlags;
- const int mStartPos;
-
- public:
- TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) :
- mDict(dict), mFlags(flags), mStartPos(pos) {
+ TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos)
+ : mDict(dict), mFlags(flags), mStartPos(pos) {
}
inline ShortcutIterator getShortcutIterator() const {
@@ -76,7 +71,16 @@ class TerminalAttributes {
// skipped quickly, so we ignore it.
return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
}
+
+ bool isBlacklistedOrNotAWord() const {
+ return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
+ const uint8_t *const mDict;
+ const uint8_t mFlags;
+ const int mStartPos;
};
} // namespace latinime
-
#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index ea9f11b2c..49d044fbc 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -1,32 +1,33 @@
/*
-**
-** Copyright 2010, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
-
-#include <assert.h>
-#include <string.h>
+ * Copyright (C) 2010, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <cstring>
#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
+#include "binary_format.h"
#include "char_utils.h"
#include "defines.h"
#include "dictionary.h"
-#include "unigram_dictionary.h"
-
-#include "binary_format.h"
+#include "proximity_info.h"
#include "terminal_attributes.h"
+#include "unigram_dictionary.h"
+#include "words_priority_queue.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
@@ -40,7 +41,7 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[
{ 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
// TODO: check the header
-UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,
+UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultiplier,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags)
: DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
@@ -57,18 +58,18 @@ UnigramDictionary::~UnigramDictionary() {
}
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
- return sizeof(*codes) * codesSize;
+ return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
}
// TODO: This needs to take a const unsigned short* and not tinker with its contents
-static inline void addWord(
- unsigned short *word, int length, int frequency, WordsPriorityQueue *queue) {
- queue->push(frequency, word, length);
+static inline void addWord(unsigned short *word, int length, int frequency,
+ WordsPriorityQueue *queue, int type) {
+ queue->push(frequency, word, length, type);
}
// Return the replacement code point for a digraph, or 0 if none.
int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int codesSize,
- const digraph_t* const digraphs, const unsigned int digraphsSize) const {
+ const digraph_t *const digraphs, const unsigned int digraphsSize) const {
// There can't be a digraph if we don't have at least 2 characters to examine
if (i + 2 > codesSize) return false;
@@ -103,9 +104,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
const bool useFullEditDistance, const int *codesSrc,
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
WordsPriorityQueuePool *queuePool,
- const digraph_t* const digraphs, const unsigned int digraphsSize) {
+ const digraph_t *const digraphs, const unsigned int digraphsSize) const {
- const int startIndex = codesDest - codesBuffer;
+ const int startIndex = static_cast<int>(codesDest - codesBuffer);
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) {
xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
@@ -169,15 +170,16 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// bigramMap contains the association <bigram address> -> <bigram frequency>
// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
// in bigram_dictionary.cpp
-int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
- WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
+int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
+ const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
+ int *outputTypes) const {
- queuePool->clearAll();
- Correction* masterCorrection = correction;
- correction->resetCorrection();
+ WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
+ queuePool.clearAll();
+ Correction masterCorrection;
+ masterCorrection.resetCorrection();
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
{ // Incrementally tune the word and try all possibilities
int codesBuffer[getCodesBufferSize(codes, codesSize)];
@@ -185,8 +187,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
int yCoordinatesBuffer[codesSize];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
- useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
- queuePool, GERMAN_UMLAUT_DIGRAPHS,
+ useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection,
+ &queuePool, GERMAN_UMLAUT_DIGRAPHS,
sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
} else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
int codesBuffer[getCodesBufferSize(codes, codesSize)];
@@ -194,36 +196,36 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
int yCoordinatesBuffer[codesSize];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
- useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
- queuePool, FRENCH_LIGATURES_DIGRAPHS,
+ useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection,
+ &queuePool, FRENCH_LIGATURES_DIGRAPHS,
sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
} else { // Normal processing
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
- bigramMap, bigramFilter, useFullEditDistance, masterCorrection, queuePool);
+ bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
}
PROF_START(20);
if (DEBUG_DICT) {
- float ns = queuePool->getMasterQueue()->getHighestNormalizedScore(
- proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0);
+ float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
+ masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0);
ns += 0;
AKLOGI("Max normalized score = %f", ns);
}
const int suggestedWordsCount =
- queuePool->getMasterQueue()->outputSuggestions(
- proximityInfo->getPrimaryInputWord(), codesSize, frequencies, outWords);
+ queuePool.getMasterQueue()->outputSuggestions(masterCorrection.getPrimaryInputWord(),
+ codesSize, frequencies, outWords, outputTypes);
if (DEBUG_DICT) {
- float ns = queuePool->getMasterQueue()->getHighestNormalizedScore(
- proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0);
+ float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
+ masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0);
ns += 0;
AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
- short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
+ short unsigned int *w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
- (void)s;
+ (void)s; // To suppress compiler warning
AKLOGI("%s %i", s, frequencies[j]);
}
}
@@ -234,8 +236,9 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) {
+ const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, Correction *correction,
+ WordsPriorityQueuePool *queuePool) const {
PROF_OPEN;
PROF_START(0);
@@ -243,7 +246,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(1);
getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter,
- useFullEditDistance, inputLength, correction, queuePool);
+ useFullEditDistance, inputSize, correction, queuePool);
PROF_END(1);
PROF_START(2);
@@ -256,10 +259,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(4);
bool hasAutoCorrectionCandidate = false;
- WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
if (masterQueue->size() > 0) {
float nsForMaster = masterQueue->getHighestNormalizedScore(
- proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
+ correction->getPrimaryInputWord(), inputSize, 0, 0, 0);
hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD);
}
PROF_END(4);
@@ -267,9 +270,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(5);
// Multiple word suggestions
if (SUGGEST_MULTIPLE_WORDS
- && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
+ && inputSize >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, correction, queuePool,
+ useFullEditDistance, inputSize, correction, queuePool,
hasAutoCorrectionCandidate);
}
PROF_END(5);
@@ -281,18 +284,18 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) {
queuePool->dumpSubQueue1TopSuggestions();
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i);
+ WordsPriorityQueue *queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i);
if (queue->size() > 0) {
- WordsPriorityQueue::SuggestedWord* sw = queue->top();
+ WordsPriorityQueue::SuggestedWord *sw = queue->top();
const int score = sw->mScore;
- const unsigned short* word = sw->mWord;
+ const unsigned short *word = sw->mWord;
const int wordLength = sw->mWordLength;
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
- proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
+ correction->getPrimaryInputWord(), i, word, wordLength, score);
ns += 0;
AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
(ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
- DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
+ DUMP_WORD(correction->getPrimaryInputWord(), i);
DUMP_WORD(word, wordLength);
}
}
@@ -300,33 +303,33 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
+ const int *yCoordinates, const int *codes, const int inputSize,
+ Correction *correction) const {
if (DEBUG_DICT) {
AKLOGI("initSuggest");
- DUMP_WORD_INT(codes, inputLength);
+ DUMP_WORD_INT(codes, inputSize);
}
- proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
- const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
- correction->initCorrection(proximityInfo, inputLength, maxDepth);
+ correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
+ const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
+ correction->initCorrection(proximityInfo, inputSize, maxDepth);
}
-static const char QUOTE = '\'';
static const char SPACE = ' ';
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, const int inputLength,
- Correction *correction, WordsPriorityQueuePool *queuePool) {
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
- getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction,
+ const bool useFullEditDistance, const int inputSize,
+ Correction *correction, WordsPriorityQueuePool *queuePool) const {
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction);
+ getSuggestionCandidates(useFullEditDistance, inputSize, bigramMap, bigramFilter, correction,
queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
}
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
- const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool *queuePool,
- const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
+ const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const {
uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount();
if (DEBUG_DICT) {
AKLOGI("Traverse count %d", totalTraverseCount);
@@ -346,7 +349,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition);
int outputIndex = 0;
- correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0));
+ correction->initCorrectionState(rootPosition, childCount, (inputSize <= 0));
// Depth first search
while (outputIndex >= 0) {
@@ -374,38 +377,55 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
inline void UnigramDictionary::onTerminal(const int probability,
const TerminalAttributes& terminalAttributes, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
- const int currentWordIndex) {
+ const int currentWordIndex) const {
const int inputIndex = correction->getInputIndex();
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
int wordLength;
- unsigned short* wordPointer;
+ unsigned short *wordPointer;
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
const int finalProbability =
correction->getFinalProbability(probability, &wordPointer, &wordLength);
- if (finalProbability != NOT_A_PROBABILITY) {
- addWord(wordPointer, wordLength, finalProbability, masterQueue);
-
- const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
- // Please note that the shortcut candidates will be added to the master queue only.
- TerminalAttributes::ShortcutIterator iterator =
- terminalAttributes.getShortcutIterator();
- while (iterator.hasNextShortcutTarget()) {
- // TODO: addWord only supports weak ordering, meaning we have no means
- // to control the order of the shortcuts relative to one another or to the word.
- // We need to either modulate the probability of each shortcut according
- // to its own shortcut probability or to make the queue
- // so that the insert order is protected inside the queue for words
- // with the same score. For the moment we use -1 to make sure the shortcut will
- // never be in front of the word.
- uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
- const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
- MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
- addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability,
- masterQueue);
+
+ if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
+ // If the probability is 0, we don't want to add this word. However we still
+ // want to add its shortcuts (including a possible whitelist entry) if any.
+ // Furthermore, if this is not a word (shortcut only for example) or a blacklisted
+ // entry then we never want to suggest this.
+ addWord(wordPointer, wordLength, finalProbability, masterQueue,
+ Dictionary::KIND_CORRECTION);
+ }
+
+ const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
+ // Please note that the shortcut candidates will be added to the master queue only.
+ TerminalAttributes::ShortcutIterator iterator =
+ terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means
+ // to control the order of the shortcuts relative to one another or to the word.
+ // We need to either modulate the probability of each shortcut according
+ // to its own shortcut probability or to make the queue
+ // so that the insert order is protected inside the queue for words
+ // with the same score. For the moment we use -1 to make sure the shortcut will
+ // never be in front of the word.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ int shortcutFrequency;
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
+ int shortcutScore;
+ int kind;
+ if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
+ && correction->sameAsTyped()) {
+ shortcutScore = S_INT_MAX;
+ kind = Dictionary::KIND_WHITELIST;
+ } else {
+ shortcutScore = shortcutProbability;
+ kind = Dictionary::KIND_CORRECTION;
}
+ addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
+ masterQueue, kind);
}
}
@@ -419,18 +439,18 @@ inline void UnigramDictionary::onTerminal(const int probability,
}
const int finalProbability = correction->getFinalProbabilityForSubQueue(
probability, &wordPointer, &wordLength, inputIndex);
- addWord(wordPointer, wordLength, finalProbability, subQueue);
+ addWord(wordPointer, wordLength, finalProbability, subQueue, Dictionary::KIND_CORRECTION);
}
}
int UnigramDictionary::getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool* queuePool, const int inputLength,
+ WordsPriorityQueuePool *queuePool, const int inputSize,
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
+ int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const {
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_ABORT;
}
@@ -473,17 +493,18 @@ int UnigramDictionary::getSubStringSuggestion(
// TODO: Remove the safety net above //
//////////////////////////////////////////////
- unsigned short* tempOutputWord = 0;
+ unsigned short *tempOutputWord = 0;
int nextWordLength = 0;
// TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- inputLength, correction);
+ inputSize, correction);
+ unsigned short word[MAX_WORD_LENGTH_INTERNAL];
int freq = getMostFrequentWordLike(
- inputWordStartPos, inputWordLength, proximityInfo, mWord);
+ inputWordStartPos, inputWordLength, correction, word);
if (freq > 0) {
nextWordLength = inputWordLength;
- tempOutputWord = mWord;
+ tempOutputWord = word;
} else if (!hasAutoCorrectionCandidate) {
if (inputWordStartPos > 0) {
const int offset = inputWordStartPos;
@@ -503,14 +524,14 @@ int UnigramDictionary::getSubStringSuggestion(
}
}
}
- WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
+ WordsPriorityQueue *queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
// TODO: Return the correct value depending on doAutoCompletion
if (!queue || queue->size() <= 0) {
return FLAG_MULTIPLE_SUGGEST_ABORT;
}
int score = 0;
const float ns = queue->getHighestNormalizedScore(
- proximityInfo->getPrimaryInputWord(), inputWordLength,
+ correction->getPrimaryInputWord(), inputWordLength,
&tempOutputWord, &score, &nextWordLength);
if (DEBUG_DICT) {
AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
@@ -524,9 +545,9 @@ int UnigramDictionary::getSubStringSuggestion(
freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
}
if (DEBUG_DICT) {
- AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)"
- , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
- wordLengthArray[0]);
+ AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)",
+ currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
+ (currentWordIndex > 0) ? wordLengthArray[0] : 0);
}
if (freq <= 0 || nextWordLength <= 0
|| MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
@@ -545,7 +566,7 @@ int UnigramDictionary::getSubStringSuggestion(
*outputWordLength = tempOutputWordLength;
}
- if ((inputWordStartPos + inputWordLength) < inputLength) {
+ if ((inputWordStartPos + inputWordLength) < inputSize) {
if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_SKIP;
}
@@ -564,31 +585,31 @@ int UnigramDictionary::getSubStringSuggestion(
freqArray[i], wordLengthArray[i]);
}
AKLOGI("Split two words: freq = %d, length = %d, %d, isSpace ? %d", pairFreq,
- inputLength, tempOutputWordLength, isSpaceProximity);
+ inputSize, tempOutputWordLength, isSpaceProximity);
}
- addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
+ addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue(),
+ Dictionary::KIND_CORRECTION);
}
return FLAG_MULTIPLE_SUGGEST_CONTINUE;
}
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength,
- Correction *correction, WordsPriorityQueuePool* queuePool,
- const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
- const int outputWordLength, int *freqArray, int* wordLengthArray,
- unsigned short* outputWord) {
+ const bool useFullEditDistance, const int inputSize, Correction *correction,
+ WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
+ const int startInputPos, const int startWordIndex, const int outputWordLength,
+ int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index
return;
}
if (startWordIndex >= 1
&& (hasAutoCorrectionCandidate
- || inputLength < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) {
+ || inputSize < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) {
// Do not suggest 3+ words if already has auto correction candidate
return;
}
- for (int i = startInputPos + 1; i < inputLength; ++i) {
+ for (int i = startInputPos + 1; i < inputSize; ++i) {
if (DEBUG_CORRECTION_FREQ) {
AKLOGI("Multi words(%d), start in %d sep %d start out %d",
startWordIndex, startInputPos, i, outputWordLength);
@@ -599,7 +620,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
int inputWordStartPos = startInputPos;
int inputWordLength = i - startInputPos;
const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates,
- codes, useFullEditDistance, correction, queuePool, inputLength,
+ codes, useFullEditDistance, correction, queuePool, inputSize,
hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength,
outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord,
&tempOutputWordLength);
@@ -616,14 +637,14 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
// Next word
// Missing space
inputWordStartPos = i;
- inputWordLength = inputLength - i;
- if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ inputWordLength = inputSize - i;
+ if (getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
!= FLAG_MULTIPLE_SUGGEST_CONTINUE) {
getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, correction, queuePool,
+ useFullEditDistance, inputSize, correction, queuePool,
hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1,
tempOutputWordLength, freqArray, wordLengthArray, outputWord);
}
@@ -646,7 +667,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
AKLOGI("Do mistyped space correction");
}
getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0);
}
@@ -654,10 +675,10 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength,
- Correction *correction, WordsPriorityQueuePool* queuePool,
- const bool hasAutoCorrectionCandidate) {
- if (inputLength >= MAX_WORD_LENGTH) return;
+ const bool useFullEditDistance, const int inputSize,
+ Correction *correction, WordsPriorityQueuePool *queuePool,
+ const bool hasAutoCorrectionCandidate) const {
+ if (inputSize >= MAX_WORD_LENGTH) return;
if (DEBUG_DICT) {
AKLOGI("--- Suggest multiple words");
}
@@ -670,7 +691,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
const int startInputPos = 0;
const int startWordIndex = 0;
getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate,
+ useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate,
startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
outputWord);
}
@@ -678,13 +699,13 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
- const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) {
- uint16_t inWord[inputLength];
+ const int inputSize, Correction *correction, unsigned short *word) const {
+ uint16_t inWord[inputSize];
- for (int i = 0; i < inputLength; ++i) {
- inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i);
+ for (int i = 0; i < inputSize; ++i) {
+ inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
}
- return getMostFrequentWordLikeInner(inWord, inputLength, word);
+ return getMostFrequentWordLikeInner(inWord, inputSize, word);
}
// This function will take the position of a character array within a CharGroup,
@@ -700,13 +721,13 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
- const uint8_t* const root, const int startPos,
- const uint16_t* const inWord, const int startInputIndex,
- int32_t* outNewWord, int* outInputIndex, int* outPos) {
- const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
+ const uint8_t *const root, const int startPos, const uint16_t *const inWord,
+ const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
+ int *outPos) {
+ const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
- int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
- int32_t baseChar = toBaseLowerCase(character);
+ int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ int32_t baseChar = toBaseLowerCase(codePoint);
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
@@ -715,18 +736,18 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
return false;
}
int inputIndex = startInputIndex;
- outNewWord[inputIndex] = character;
+ outNewWord[inputIndex] = codePoint;
if (hasMultipleChars) {
- character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
- while (NOT_A_CHARACTER != character) {
- baseChar = toBaseLowerCase(character);
- if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+ codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ while (NOT_A_CODE_POINT != codePoint) {
+ baseChar = toBaseLowerCase(codePoint);
+ if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex;
return false;
}
- outNewWord[inputIndex] = character;
- character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+ outNewWord[inputIndex] = codePoint;
+ codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
}
}
*outInputIndex = inputIndex + 1;
@@ -738,11 +759,12 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
// It will compare the frequency to the max frequency, and if greater, will
// copy the word into the output buffer. In output value maxFreq, it will
// write the new maximum frequency if it changed.
-static inline void onTerminalWordLike(const int freq, int32_t* newWord, const int length,
- short unsigned int* outWord, int* maxFreq) {
+static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
+ short unsigned int *outWord, int *maxFreq) {
if (freq > *maxFreq) {
- for (int q = 0; q < length; ++q)
+ for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q];
+ }
outWord[length] = 0;
*maxFreq = freq;
}
@@ -750,30 +772,33 @@ static inline void onTerminalWordLike(const int freq, int32_t* newWord, const in
// Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents.
-int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWord,
- const int length, short unsigned int* outWord) {
+int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
+ const int inputSize, short unsigned int *outWord) const {
int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0;
int maxFreq = -1;
- const uint8_t* const root = DICT_ROOT;
+ const uint8_t *const root = DICT_ROOT;
+ int stackChildCount[MAX_WORD_LENGTH_INTERNAL];
+ int stackInputIndex[MAX_WORD_LENGTH_INTERNAL];
+ int stackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
int startPos = 0;
- mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
- mStackInputIndex[0] = 0;
- mStackSiblingPos[0] = startPos;
+ stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
+ stackInputIndex[0] = 0;
+ stackSiblingPos[0] = startPos;
while (depth >= 0) {
- const int charGroupCount = mStackChildCount[depth];
- int pos = mStackSiblingPos[depth];
+ const int charGroupCount = stackChildCount[depth];
+ int pos = stackSiblingPos[depth];
for (int charGroupIndex = charGroupCount - 1; charGroupIndex >= 0; --charGroupIndex) {
- int inputIndex = mStackInputIndex[depth];
+ int inputIndex = stackInputIndex[depth];
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
// Test whether all chars in this group match with the word we are searching for. If so,
- // we want to traverse its children (or if the length match, evaluate its frequency).
+ // we want to traverse its children (or if the inputSize match, evaluate its frequency).
// Note that this function will output the position regardless, but will only write
// into inputIndex if there is a match.
const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
- inputIndex, newWord, &inputIndex, &pos);
- if (isAlike && (FLAG_IS_TERMINAL & flags) && (inputIndex == length)) {
+ inputIndex, inputSize, newWord, &inputIndex, &pos);
+ if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
}
@@ -782,18 +807,18 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
// If we had a match and the word has children, we want to traverse them. We don't have
// to traverse words longer than the one we are searching for, since they will not match
- // anyway, so don't traverse unless inputIndex < length.
- if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) {
+ // anyway, so don't traverse unless inputIndex < inputSize.
+ if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
// Save position for this depth, to get back to this once children are done
- mStackChildCount[depth] = charGroupIndex;
- mStackSiblingPos[depth] = siblingPos;
+ stackChildCount[depth] = charGroupIndex;
+ stackSiblingPos[depth] = siblingPos;
// Prepare stack values for next depth
++depth;
int childrenPos = childrenNodePos;
- mStackChildCount[depth] =
+ stackChildCount[depth] =
BinaryFormat::getGroupCountAndForwardPointer(root, &childrenPos);
- mStackSiblingPos[depth] = childrenPos;
- mStackInputIndex[depth] = inputIndex;
+ stackSiblingPos[depth] = childrenPos;
+ stackInputIndex[depth] = inputIndex;
pos = childrenPos;
// Go to the next depth level.
++depth;
@@ -808,18 +833,25 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
return maxFreq;
}
-int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const {
- const uint8_t* const root = DICT_ROOT;
- int pos = BinaryFormat::getTerminalPosition(root, inWord, length);
+int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const {
+ const uint8_t *const root = DICT_ROOT;
+ int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
+ false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == pos) {
return NOT_A_PROBABILITY;
}
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
+ if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no frequency outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
+ const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos);
} else {
- BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
+ BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
}
const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
return unigramFreq;
@@ -848,7 +880,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
- WordsPriorityQueuePool *queuePool, const int currentWordIndex) {
+ WordsPriorityQueuePool *queuePool, const int currentWordIndex) const {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -863,8 +895,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children)
// - FLAG_HAS_BIGRAMS: whether this node has bigrams or not
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos);
- const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
- const bool isTerminalNode = (0 != (FLAG_IS_TERMINAL & flags));
+ const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
+ const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
bool needsToInvokeOnTerminal = false;
@@ -873,23 +905,23 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children.
- int32_t c = BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
- assert(NOT_A_CHARACTER != c);
+ int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
+ assert(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different
// node each time. To do that, we will process characters in this node in order until
- // we find the character terminator. This is signalled by getCharCode* returning
- // NOT_A_CHARACTER.
+ // we find the character terminator. This is signalled by getCodePoint* returning
+ // NOT_A_CODE_POINT.
// As a special case, if there is only one character in this node, we must not read the
- // next bytes so we will simulate the NOT_A_CHARACTER return by testing the flags.
+ // next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
// This way, each loop run will look like a "virtual node".
do {
// We prefetch the next char. If 'c' is the last char of this node, we will have
- // NOT_A_CHARACTER in the next char. From this we can decide whether this virtual node
+ // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children.
const int32_t nextc = hasMultipleChars
- ? BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CHARACTER;
- const bool isLastChar = (NOT_A_CHARACTER == nextc);
+ ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
+ const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal.
// If we are on the last char, this virtual node is a terminal if this node is.
const bool isTerminal = isLastChar && isTerminalNode;
@@ -918,9 +950,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// Prepare for the next character. Promote the prefetched char to current char - the loop
// will take care of prefetching the next. If we finally found our last char, nextc will
- // contain NOT_A_CHARACTER.
+ // contain NOT_A_CODE_POINT.
c = nextc;
- } while (NOT_A_CHARACTER != c);
+ } while (NOT_A_CODE_POINT != c);
if (isTerminalNode) {
// The frequency should be here, because we come here only if this is actually
@@ -982,5 +1014,4 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
*newChildrenPosition = childrenPos;
return true;
}
-
} // namespace latinime
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index a1a8299e5..57129bb07 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -19,53 +19,19 @@
#include <map>
#include <stdint.h>
-#include "correction.h"
-#include "correction_state.h"
#include "defines.h"
-#include "proximity_info.h"
-#include "words_priority_queue.h"
-#include "words_priority_queue_pool.h"
namespace latinime {
+class Correction;
+class ProximityInfo;
class TerminalAttributes;
+class WordsPriorityQueuePool;
+
class UnigramDictionary {
typedef struct { int first; int second; int replacement; } digraph_t;
public:
- // Mask and flags for children address type selection.
- static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
- static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
- static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
- static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
- static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
-
- // Flag for single/multiple char group
- static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
-
- // Flag for terminal groups
- static const int FLAG_IS_TERMINAL = 0x10;
-
- // Flag for shortcut targets presence
- static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
- // Flag for bigram presence
- static const int FLAG_HAS_BIGRAMS = 0x04;
-
- // Attribute (bigram/shortcut) related flags:
- // Flag for presence of more attributes
- static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
- // Flag for sign of offset. If this flag is set, the offset value must be negated.
- static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-
- // Mask for attribute frequency, stored on 4 bits inside the flags byte.
- static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
-
- // Mask and flags for attribute address type selection.
- static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-
// Error tolerances
static const int DEFAULT_MAX_ERRORS = 2;
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
@@ -73,80 +39,80 @@ class UnigramDictionary {
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
- UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
+ UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
- int getFrequency(const int32_t* const inWord, const int length) const;
+ int getFrequency(const int32_t *const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
- int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
- Correction *correction, const int *xcoordinates, const int *ycoordinates,
- const int *codes, const int codesSize, const std::map<int, int> *bigramMap,
- const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords,
- int *frequencies);
+ int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const int codesSize,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
+ int *outputTypes) const;
virtual ~UnigramDictionary();
private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary);
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int inputLength,
+ const int *ycoordinates, const int *codes, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool *queuePool);
+ WordsPriorityQueuePool *queuePool) const;
int getDigraphReplacement(const int *codes, const int i, const int codesSize,
- const digraph_t* const digraphs, const unsigned int digraphsSize) const;
+ const digraph_t *const digraphs, const unsigned int digraphsSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
+ const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, Correction *correction,
- WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
- const unsigned int digraphsSize);
+ const bool useFullEditDistance, const int *codesSrc, const int codesRemain,
+ const int currentDepth, int *codesDest, Correction *correction,
+ WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs,
+ const unsigned int digraphsSize) const;
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
+ const int *ycoordinates, const int *codes, const int codesSize,
+ Correction *correction) const;
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
- const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength,
- Correction *correction, WordsPriorityQueuePool* queuePool);
+ const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize,
+ Correction *correction, WordsPriorityQueuePool *queuePool) const;
void getSuggestionCandidates(
- const bool useFullEditDistance, const int inputLength,
+ const bool useFullEditDistance, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
- const int maxErrors, const int currentWordIndex);
+ Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion,
+ const int maxErrors, const int currentWordIndex) const;
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength,
- Correction *correction, WordsPriorityQueuePool* queuePool,
- const bool hasAutoCorrectionCandidate);
+ const bool useFullEditDistance, const int inputSize,
+ Correction *correction, WordsPriorityQueuePool *queuePool,
+ const bool hasAutoCorrectionCandidate) const;
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
- const int currentWordIndex);
- bool needsToSkipCurrentNode(const unsigned short c,
- const int inputIndex, const int skipPos, const int depth);
+ const int currentWordIndex) const;
// Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, Correction *correction, int *newCount,
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
- const int currentWordIndex);
- int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
- ProximityInfo *proximityInfo, unsigned short *word);
- int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
- short unsigned int *outWord);
+ const int currentWordIndex) const;
+ int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
+ Correction *correction, unsigned short *word) const;
+ int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
+ short unsigned int *outWord) const;
int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool* queuePool, const int inputLength,
+ WordsPriorityQueuePool *queuePool, const int inputSize,
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);
+ int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength,
- Correction *correction, WordsPriorityQueuePool* queuePool,
+ const bool useFullEditDistance, const int inputSize,
+ Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
- const int outputWordLength, int *freqArray, int* wordLengthArray,
- unsigned short* outputWord);
+ const int outputWordLength, int *freqArray, int *wordLengthArray,
+ unsigned short *outputWord) const;
- const uint8_t* const DICT_ROOT;
+ const uint8_t *const DICT_ROOT;
const int MAX_WORD_LENGTH;
const int MAX_WORDS;
const int TYPED_LETTER_MULTIPLIER;
@@ -158,13 +124,6 @@ class UnigramDictionary {
static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
-
- // Still bundled members
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
- int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
- int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
- int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
};
} // namespace latinime
-
#endif // LATINIME_UNIGRAM_DICTIONARY_H
diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h
index 7629251d6..19efa5da3 100644
--- a/native/jni/src/words_priority_queue.h
+++ b/native/jni/src/words_priority_queue.h
@@ -18,8 +18,9 @@
#define LATINIME_WORDS_PRIORITY_QUEUE_H
#include <cstring> // for memcpy()
-#include <iostream>
#include <queue>
+
+#include "correction.h"
#include "defines.h"
namespace latinime {
@@ -32,31 +33,32 @@ class WordsPriorityQueue {
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mWordLength;
bool mUsed;
+ int mType;
- void setParams(int score, unsigned short* word, int wordLength) {
+ void setParams(int score, unsigned short *word, int wordLength, int type) {
mScore = score;
mWordLength = wordLength;
memcpy(mWord, word, sizeof(unsigned short) * wordLength);
mUsed = true;
+ mType = type;
}
};
- WordsPriorityQueue(int maxWords, int maxWordLength) :
- MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH(
- (unsigned int) maxWordLength) {
- mSuggestedWords = new SuggestedWord[maxWordLength];
+ WordsPriorityQueue(int maxWords, int maxWordLength)
+ : mSuggestions(), MAX_WORDS(static_cast<unsigned int>(maxWords)),
+ MAX_WORD_LENGTH(static_cast<unsigned int>(maxWordLength)),
+ mSuggestedWords(new SuggestedWord[maxWordLength]), mHighestSuggestedWord(0) {
for (int i = 0; i < maxWordLength; ++i) {
mSuggestedWords[i].mUsed = false;
}
- mHighestSuggestedWord = 0;
}
- ~WordsPriorityQueue() {
+ virtual ~WordsPriorityQueue() {
delete[] mSuggestedWords;
}
- void push(int score, unsigned short* word, int wordLength) {
- SuggestedWord* sw = 0;
+ void push(int score, unsigned short *word, int wordLength, int type) {
+ SuggestedWord *sw = 0;
if (mSuggestions.size() >= MAX_WORDS) {
sw = mSuggestions.top();
const int minScore = sw->mScore;
@@ -68,9 +70,9 @@ class WordsPriorityQueue {
}
}
if (sw == 0) {
- sw = getFreeSuggestedWord(score, word, wordLength);
+ sw = getFreeSuggestedWord(score, word, wordLength, type);
} else {
- sw->setParams(score, word, wordLength);
+ sw->setParams(score, word, wordLength, type);
}
if (sw == 0) {
AKLOGE("SuggestedWord is accidentally null.");
@@ -86,21 +88,21 @@ class WordsPriorityQueue {
}
}
- SuggestedWord* top() {
+ SuggestedWord *top() {
if (mSuggestions.empty()) return 0;
- SuggestedWord* sw = mSuggestions.top();
+ SuggestedWord *sw = mSuggestions.top();
return sw;
}
- int outputSuggestions(const unsigned short* before, const int beforeLength,
- int *frequencies, unsigned short *outputChars) {
+ int outputSuggestions(const unsigned short *before, const int beforeLength,
+ int *frequencies, unsigned short *outputChars, int* outputTypes) {
mHighestSuggestedWord = 0;
const unsigned int size = min(
MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
- SuggestedWord* swBuffer[size];
+ SuggestedWord *swBuffer[size];
int index = size - 1;
while (!mSuggestions.empty() && index >= 0) {
- SuggestedWord* sw = mSuggestions.top();
+ SuggestedWord *sw = mSuggestions.top();
if (DEBUG_WORDS_PRIORITY_QUEUE) {
AKLOGI("dump word. %d", sw->mScore);
DUMP_WORD(sw->mWord, sw->mWordLength);
@@ -110,11 +112,11 @@ class WordsPriorityQueue {
--index;
}
if (size >= 2) {
- SuggestedWord* nsMaxSw = 0;
+ SuggestedWord *nsMaxSw = 0;
unsigned int maxIndex = 0;
float maxNs = 0;
for (unsigned int i = 0; i < size; ++i) {
- SuggestedWord* tempSw = swBuffer[i];
+ SuggestedWord *tempSw = swBuffer[i];
if (!tempSw) {
continue;
}
@@ -126,22 +128,23 @@ class WordsPriorityQueue {
}
}
if (maxIndex > 0 && nsMaxSw) {
- memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(SuggestedWord*));
+ memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(SuggestedWord *));
swBuffer[0] = nsMaxSw;
}
}
for (unsigned int i = 0; i < size; ++i) {
- SuggestedWord* sw = swBuffer[i];
+ SuggestedWord *sw = swBuffer[i];
if (!sw) {
AKLOGE("SuggestedWord is null %d", i);
continue;
}
const unsigned int wordLength = sw->mWordLength;
- char* targetAdr = (char*) outputChars + i * MAX_WORD_LENGTH * sizeof(short);
+ unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH;
frequencies[i] = sw->mScore;
- memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short));
+ outputTypes[i] = sw->mType;
+ memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short));
if (wordLength < MAX_WORD_LENGTH) {
- ((unsigned short*) targetAdr)[wordLength] = 0;
+ targetAddress[wordLength] = 0;
}
sw->mUsed = false;
}
@@ -155,7 +158,7 @@ class WordsPriorityQueue {
void clear() {
mHighestSuggestedWord = 0;
while (!mSuggestions.empty()) {
- SuggestedWord* sw = mSuggestions.top();
+ SuggestedWord *sw = mSuggestions.top();
if (DEBUG_WORDS_PRIORITY_QUEUE) {
AKLOGI("Clear word. %d", sw->mScore);
DUMP_WORD(sw->mWord, sw->mWordLength);
@@ -172,8 +175,8 @@ class WordsPriorityQueue {
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
}
- float getHighestNormalizedScore(const unsigned short* before, const int beforeLength,
- unsigned short** outWord, int *outScore, int *outLength) {
+ float getHighestNormalizedScore(const unsigned short *before, const int beforeLength,
+ unsigned short **outWord, int *outScore, int *outLength) {
if (!mHighestSuggestedWord) {
return 0.0;
}
@@ -182,27 +185,28 @@ class WordsPriorityQueue {
}
private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue);
struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
return left->mScore > right->mScore;
}
};
- SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
- int wordLength) {
+ SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word,
+ int wordLength, int type) {
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
if (!mSuggestedWords[i].mUsed) {
- mSuggestedWords[i].setParams(score, word, wordLength);
+ mSuggestedWords[i].setParams(score, word, wordLength, type);
return &mSuggestedWords[i];
}
}
return 0;
}
- static float getNormalizedScore(SuggestedWord* sw, const unsigned short* before,
- const int beforeLength, unsigned short** outWord, int *outScore, int *outLength) {
+ static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before,
+ const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) {
const int score = sw->mScore;
- unsigned short* word = sw->mWord;
+ unsigned short *word = sw->mWord;
const int wordLength = sw->mWordLength;
if (outScore) {
*outScore = score;
@@ -217,14 +221,13 @@ class WordsPriorityQueue {
before, beforeLength, word, wordLength, score);
}
- typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
+ typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
wordComparator> Suggestions;
Suggestions mSuggestions;
const unsigned int MAX_WORDS;
const unsigned int MAX_WORD_LENGTH;
- SuggestedWord* mSuggestedWords;
- SuggestedWord* mHighestSuggestedWord;
+ SuggestedWord *mSuggestedWords;
+ SuggestedWord *mHighestSuggestedWord;
};
-}
-
+} // namespace latinime
#endif // LATINIME_WORDS_PRIORITY_QUEUE_H
diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h
index 210b5a848..2d52903e0 100644
--- a/native/jni/src/words_priority_queue_pool.h
+++ b/native/jni/src/words_priority_queue_pool.h
@@ -17,20 +17,20 @@
#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
-#include <assert.h>
-#include <new>
+#include <cassert>
#include "words_priority_queue.h"
namespace latinime {
class WordsPriorityQueuePool {
public:
- WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
- // Note: using placement new() requires the caller to call the destructor explicitly.
- mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
+ WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength)
+ // Note: using placement new() requires the caller to call the destructor explicitly.
+ : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue(
+ mainQueueMaxWords, maxWordLength)) {
for (int i = 0, subQueueBufOffset = 0;
i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
- ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
+ ++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) {
mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
WordsPriorityQueue(subQueueMaxWords, maxWordLength);
}
@@ -44,11 +44,11 @@ class WordsPriorityQueuePool {
}
}
- WordsPriorityQueue* getMasterQueue() {
+ WordsPriorityQueue *getMasterQueue() {
return mMasterQueue;
}
- WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) {
+ WordsPriorityQueue *getSubQueue(const int wordIndex, const int inputWordLength) {
if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
return 0;
}
@@ -70,7 +70,7 @@ class WordsPriorityQueuePool {
inline void clearSubQueue(const int wordIndex) {
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- WordsPriorityQueue* queue = getSubQueue(wordIndex, i);
+ WordsPriorityQueue *queue = getSubQueue(wordIndex, i);
if (queue) {
queue->clear();
}
@@ -85,12 +85,12 @@ class WordsPriorityQueuePool {
}
private:
- WordsPriorityQueue* mMasterQueue;
- WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
+ DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueuePool);
char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
- char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
- * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+ char mSubQueueBuf[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
+ * sizeof(WordsPriorityQueue)];
+ WordsPriorityQueue *mMasterQueue;
+ WordsPriorityQueue *mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
};
-}
-
+} // namespace latinime
#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H