aboutsummaryrefslogtreecommitdiffstats
path: root/native/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/src')
-rw-r--r--native/src/basechars.cpp (renamed from native/src/basechars.h)12
-rw-r--r--native/src/bigram_dictionary.h4
-rw-r--r--native/src/binary_format.h18
-rw-r--r--native/src/char_utils.h39
-rw-r--r--native/src/correction.cpp211
-rw-r--r--native/src/correction.h35
-rw-r--r--native/src/defines.h10
-rw-r--r--native/src/dictionary.cpp5
-rw-r--r--native/src/dictionary.h27
-rw-r--r--native/src/proximity_info.cpp18
-rw-r--r--native/src/proximity_info.h6
-rw-r--r--native/src/terminal_attributes.h78
-rw-r--r--native/src/unigram_dictionary.cpp300
-rw-r--r--native/src/unigram_dictionary.h87
-rw-r--r--native/src/words_priority_queue.h157
-rw-r--r--native/src/words_priority_queue_pool.h54
16 files changed, 755 insertions, 306 deletions
diff --git a/native/src/basechars.h b/native/src/basechars.cpp
index 3843e11c5..31f1e18a8 100644
--- a/native/src/basechars.h
+++ b/native/src/basechars.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#ifndef LATINIME_BASECHARS_H
-#define LATINIME_BASECHARS_H
+#include "char_utils.h"
+
+namespace latinime {
/**
* Table mapping most combined Latin, Greek, and Cyrillic characters
@@ -23,7 +24,7 @@
* if c is not a combined character, or the base character if it
* is combined.
*/
-static unsigned short BASE_CHARS[] = {
+const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
@@ -189,4 +190,5 @@ static unsigned short BASE_CHARS[] = {
// generated with:
// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
-#endif // LATINIME_BASECHARS_H
+
+} // namespace latinime
diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h
index c07458a38..585a1866a 100644
--- a/native/src/bigram_dictionary.h
+++ b/native/src/bigram_dictionary.h
@@ -21,14 +21,14 @@ namespace latinime {
class Dictionary;
class BigramDictionary {
-public:
+ public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives,
const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
~BigramDictionary();
-private:
+ private:
bool addWordBigram(unsigned short *word, int length, int frequency);
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index 6f65088db..9944fa2bd 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -22,12 +22,12 @@
namespace latinime {
class BinaryFormat {
-private:
+ private:
const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-public:
+ public:
const static int UNKNOWN_FORMAT = -1;
const static int FORMAT_VERSION_1 = 1;
const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1;
@@ -145,15 +145,15 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
const int pos) {
- // This function skips all attributes. The format makes provision for future extension
- // with other attributes (notably shortcuts) but for the time being, bigrams are the
- // only attributes that may be found in a character group, so we only look at bigrams
- // in this version.
+ // This function skips all attributes: shortcuts and bigrams.
+ int newPos = pos;
+ if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
+ newPos = skipAttributes(dict, newPos);
+ }
if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
- return skipAttributes(dict, pos);
- } else {
- return pos;
+ newPos = skipAttributes(dict, newPos);
}
+ return newPos;
}
inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
diff --git a/native/src/char_utils.h b/native/src/char_utils.h
index a69a35e7a..607dc5195 100644
--- a/native/src/char_utils.h
+++ b/native/src/char_utils.h
@@ -19,8 +19,47 @@
namespace latinime {
+inline static int isAsciiUpper(unsigned short c) {
+ return c >= 'A' && c <= 'Z';
+}
+
+inline static unsigned short toAsciiLower(unsigned short c) {
+ return c - 'A' + 'a';
+}
+
+inline static int isAscii(unsigned short c) {
+ return c <= 127;
+}
+
unsigned short latin_tolower(unsigned short c);
+/**
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ */
+
+static const int BASE_CHARS_SIZE = 0x0500;
+extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+
+inline static unsigned short toBaseChar(unsigned short c) {
+ if (c < BASE_CHARS_SIZE) {
+ return BASE_CHARS[c];
+ }
+ return c;
+}
+
+inline static unsigned short toBaseLowerCase(unsigned short c) {
+ c = toBaseChar(c);
+ if (isAsciiUpper(c)) {
+ return toAsciiLower(c);
+ } else if (isAscii(c)) {
+ return c;
+ }
+ return latin_tolower(c);
+}
+
} // namespace latinime
#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 27dc40745..dc31bfae7 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -16,11 +16,13 @@
#include <assert.h>
#include <ctype.h>
+#include <math.h>
#include <stdio.h>
#include <string.h>
#define LOG_TAG "LatinIME: correction.cpp"
+#include "char_utils.h"
#include "correction.h"
#include "dictionary.h"
#include "proximity_info.h"
@@ -31,73 +33,49 @@ namespace latinime {
// edit distance funcitons //
/////////////////////////////
-#if 0 /* no longer used */
-inline static int editDistance(
- int* editDistanceTable, const unsigned short* input,
- const int inputLength, const unsigned short* output, const int outputLength) {
- // dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int* dp = editDistanceTable;
- const int li = inputLength + 1;
- const int lo = outputLength + 1;
- for (int i = 0; i < li; ++i) {
- dp[lo * i] = i;
- }
- for (int i = 0; i < lo; ++i) {
- dp[i] = i;
- }
-
- for (int i = 0; i < li - 1; ++i) {
- for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
- const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
- const uint16_t cost = (ci == co) ? 0 : 1;
- dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
- min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
- if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1])
- && co == Dictionary::toBaseLowerCase(input[i - 1])) {
- dp[(i + 1) * lo + (j + 1)] = min(
- dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
- }
- }
+inline static void initEditDistance(int *editDistanceTable) {
+ for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
+ editDistanceTable[i] = i;
}
+}
- if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", inputLength, outputLength);
- for (int i = 0; i < li; ++i) {
- for (int j = 0; j < lo; ++j) {
- LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
+ const int outputLength) {
+ if (DEBUG_DICT) {
+ LOGI("EditDistanceTable");
+ for (int i = 0; i <= 10; ++i) {
+ int c[11];
+ for (int j = 0; j <= 10; ++j) {
+ if (j < inputLength + 1 && i < outputLength + 1) {
+ c[j] = (editDistanceTable + i * (inputLength + 1))[j];
+ } else {
+ c[j] = -1;
+ }
}
+ LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
- return dp[li * lo - 1];
-}
-#endif
-
-inline static void initEditDistance(int *editDistanceTable) {
- for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
- editDistanceTable[i] = i;
- }
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputLength, const unsigned short *output, const int outputLength) {
+ // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
int *const current = editDistanceTable + outputLength * (inputLength + 1);
const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : NULL;
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
current[0] = outputLength;
- const uint32_t co = Dictionary::toBaseLowerCase(output[outputLength - 1]);
- const uint32_t prevCO =
- outputLength >= 2 ? Dictionary::toBaseLowerCase(output[outputLength - 2]) : 0;
+ const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
+ const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputLength; ++i) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i - 1]);
+ const uint32_t ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
- if (i >= 2 && prevprev && ci == prevCO
- && co == Dictionary::toBaseLowerCase(input[i - 2])) {
+ if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
current[i] = min(current[i], prevprev[i - 2] + 1);
}
}
@@ -105,6 +83,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
+ if (DEBUG_DICT) {
+ LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ }
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
}
@@ -133,6 +114,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
mInputLength = inputLength;
mMaxDepth = maxDepth;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ // TODO: This is not supposed to be required. Check what's going wrong with
+ // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
+ initEditDistance(mEditDistanceTable);
}
void Correction::initCorrectionState(
@@ -146,7 +130,7 @@ void Correction::initCorrectionState(
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
- const bool useFullEditDistance) {
+ const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
// TODO: remove
mTransposedPos = transposedPos;
mExcessivePos = excessivePos;
@@ -159,6 +143,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mSpaceProximityPos = spaceProximityPos;
mMissingSpacePos = missingSpacePos;
mUseFullEditDistance = useFullEditDistance;
+ mDoAutoCompletion = doAutoCompletion;
+ mMaxErrors = maxErrors;
}
void Correction::checkState() {
@@ -280,7 +266,9 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const {
// TODO: use edit distance here
- return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
+ return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
+ // Allow one char longer word for missing character
+ || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength));
}
void Correction::addCharToCurrentWord(const int32_t c) {
@@ -312,12 +300,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
+ if (correctionCount > mMaxErrors) {
+ return UNRELATED;
+ }
+
// TODO: Change the limit if we'll allow two or more corrections
const bool noCorrectionsHappenedSoFar = correctionCount == 0;
const bool canTryCorrection = noCorrectionsHappenedSoFar;
int proximityIndex = 0;
mDistances[mOutputIndex] = NOT_A_DISTANCE;
+ // Skip checking this node
if (mNeedsToTraverseAllNodes || isQuote(c)) {
bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
@@ -342,6 +335,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
return processSkipChar(c, isTerminal, incremented);
}
+ // Check possible corrections.
if (mExcessivePos >= 0) {
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
mExcessivePos = mOutputIndex;
@@ -392,7 +386,12 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
// TODO: Change the limit if we'll allow two or more proximity chars with corrections
- const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0;
+ // Work around: When the mMaxErrors is 1, we only allow just one error
+ // including proximity correction.
+ const bool checkProximityChars = (mMaxErrors > 1)
+ ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
+ : (noCorrectionsHappenedSoFar && mProximityCount == 0);
+
ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
? ProximityInfo::EQUIVALENT_CHAR
: mProximityInfo->getMatchedProximityId(
@@ -607,13 +606,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (isupper(c)) {
- return true;
- }
- return false;
+ return isAsciiUpper(toBaseChar(c));
}
//////////////////////
@@ -654,6 +647,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
+ if (DEBUG_DICT_FULL) {
+ dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
+ }
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
@@ -938,4 +934,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
return totalFreq;
}
+/* Damerau-Levenshtein distance */
+inline static int editDistanceInternal(
+ int* editDistanceTable, const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+ int* dp = editDistanceTable;
+ const int li = beforeLength + 1;
+ const int lo = afterLength + 1;
+ for (int i = 0; i < li; ++i) {
+ dp[lo * i] = i;
+ }
+ for (int i = 0; i < lo; ++i) {
+ dp[i] = i;
+ }
+
+ for (int i = 0; i < li - 1; ++i) {
+ for (int j = 0; j < lo - 1; ++j) {
+ const uint32_t ci = toBaseLowerCase(before[i]);
+ const uint32_t co = toBaseLowerCase(after[j]);
+ const uint16_t cost = (ci == co) ? 0 : 1;
+ dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+ min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+ if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
+ && co == toBaseLowerCase(before[i - 1])) {
+ dp[(i + 1) * lo + (j + 1)] = min(
+ dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+ }
+ }
+ }
+
+ if (DEBUG_EDIT_DISTANCE) {
+ LOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ for (int i = 0; i < li; ++i) {
+ for (int j = 0; j < lo; ++j) {
+ LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+ }
+ }
+ }
+ return dp[li * lo - 1];
+}
+
+int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ int table[(beforeLength + 1) * (afterLength + 1)];
+ return editDistanceInternal(table, before, beforeLength, after, afterLength);
+}
+
+
+// In dictionary.cpp, getSuggestion() method,
+// suggestion scores are computed using the below formula.
+// original score
+// := pow(mTypedLetterMultiplier (this is defined 2),
+// (the number of matched characters between typed word and suggested word))
+// * (individual word's score which defined in the unigram dictionary,
+// and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+// - If the dictionary word is matched up to the point of the user entry
+// (full match up to min(before.length(), after.length())
+// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+// - If the word is a true full match except for differences in accents or
+// capitalization, then treat it as if the score was 255.
+// - If before.length() == after.length()
+// => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */
+double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength,
+ const int score) {
+ if (0 == beforeLength || 0 == afterLength) {
+ return 0;
+ }
+ const int distance = editDistance(before, beforeLength, after, afterLength);
+ int spaceCount = 0;
+ for (int i = 0; i < afterLength; ++i) {
+ if (after[i] == CODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+
+ if (spaceCount == afterLength) {
+ return 0;
+ }
+
+ const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
+ * pow((double)TYPED_LETTER_MULTIPLIER,
+ (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+
+ // add a weight based on edit distance.
+ // distance <= max(afterLength, beforeLength) == afterLength,
+ // so, 0 <= distance / afterLength <= 1
+ const double weight = 1.0 - (double) distance / afterLength;
+ return (score / maxScore) * weight;
+}
+
} // namespace latinime
diff --git a/native/src/correction.h b/native/src/correction.h
index d4e99f0ce..4012e7e82 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -27,8 +27,7 @@ namespace latinime {
class ProximityInfo;
class Correction {
-
-public:
+ public:
typedef enum {
TRAVERSE_ALL_ON_TERMINAL,
TRAVERSE_ALL_NOT_ON_TERMINAL,
@@ -44,7 +43,8 @@ public:
// TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
- const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance);
+ const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
+ const bool doAutoCompletion, const int maxErrors);
void checkState();
bool initProcessState(const int index);
@@ -94,7 +94,25 @@ public:
inline int getTreeParentIndex(const int index) const {
return mCorrectionStates[index].mParentIndex;
}
-private:
+
+ class RankingAlgorithm {
+ public:
+ static int calculateFinalFreq(const int inputIndex, const int depth,
+ const int freq, int *editDistanceTable, const Correction* correction);
+ static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
+ static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
+ const unsigned short* after, const int afterLength, const int score);
+ static int editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength);
+ private:
+ static const int CODE_SPACE = ' ';
+ static const int MAX_INITIAL_SCORE = 255;
+ static const int TYPED_LETTER_MULTIPLIER = 2;
+ static const int FULL_WORD_MULTIPLIER = 2;
+ };
+
+ private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline bool needsToTraverseAllNodes();
@@ -109,6 +127,7 @@ private:
const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance;
+ bool mDoAutoCompletion;
int mMaxEditDistance;
int mMaxDepth;
int mInputLength;
@@ -116,6 +135,7 @@ private:
int mMissingSpacePos;
int mTerminalInputIndex;
int mTerminalOutputIndex;
+ int mMaxErrors;
// The following arrays are state buffer.
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
@@ -150,13 +170,6 @@ private:
bool mTransposing;
bool mSkipping;
- class RankingAlgorithm {
- public:
- static int calculateFinalFreq(const int inputIndex, const int depth,
- const int freq, int *editDistanceTable, const Correction* correction);
- static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
- const Correction* correction, const unsigned short *word);
- };
};
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/native/src/defines.h b/native/src/defines.h
index ef1beb92f..d636e5197 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -98,9 +98,10 @@ static void prof_out(void) {
#define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL
-#define DEBUG_PROXIMITY_INFO true
+#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ true
+#define DEBUG_WORDS_PRIORITY_QUEUE true
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
@@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_WORDS_PRIORITY_QUEUE false
#define DUMP_WORD(word, length)
@@ -196,10 +198,14 @@ static void dumpWord(const unsigned short* word, const int length) {
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f
-// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
+// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
#define MAX_WORD_LENGTH_INTERNAL 48
+// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
+// for better performance.
+#define SUB_QUEUE_MAX_WORDS 5
+
#define MAX_DEPTH_MULTIPLIER 3
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index a49769bdb..e3673d425 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -38,6 +38,9 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
}
}
+ mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
+ mWordsPriorityQueuePool = new WordsPriorityQueuePool(
+ maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives,
@@ -45,6 +48,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
}
Dictionary::~Dictionary() {
+ delete mCorrection;
+ delete mWordsPriorityQueuePool;
delete mUnigramDictionary;
delete mBigramDictionary;
}
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index d5de0083a..79d377a4f 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -17,22 +17,25 @@
#ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H
-#include "basechars.h"
#include "bigram_dictionary.h"
#include "char_utils.h"
+#include "correction.h"
#include "defines.h"
#include "proximity_info.h"
#include "unigram_dictionary.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
class Dictionary {
-public:
+ public:
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
+
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) {
- return mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
+ mCorrection, xcoordinates, ycoordinates, codes,
codesSize, flags, outWords, frequencies);
}
@@ -63,9 +66,8 @@ public:
static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
const int pos, unsigned short *c, int *childrenPosition,
bool *terminal, int *freq);
- static inline unsigned short toBaseLowerCase(unsigned short c);
-private:
+ private:
bool hasBigram();
const unsigned char *mDict;
@@ -79,6 +81,8 @@ private:
const bool IS_LATEST_DICT_VERSION;
UnigramDictionary *mUnigramDictionary;
BigramDictionary *mBigramDictionary;
+ WordsPriorityQueuePool *mWordsPriorityQueuePool;
+ Correction *mCorrection;
};
// public static utility methods
@@ -156,19 +160,6 @@ inline int Dictionary::setDictionaryValues(const unsigned char *dict,
return position;
}
-
-inline unsigned short Dictionary::toBaseLowerCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (c >='A' && c <= 'Z') {
- c |= 32;
- } else if (c > 127) {
- c = latin_tolower(c);
- }
- return c;
-}
-
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index 763a3a174..95e35263b 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -47,7 +47,7 @@ ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboard
HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
&& keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
&& sweetSpotCenterYs && sweetSpotRadii),
- mInputXCoordinates(NULL), mInputYCoordinates(NULL),
+ mInputXCoordinates(0), mInputYCoordinates(0),
mTouchPositionCorrectionEnabled(false) {
const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
mProximityCharsArray = new uint32_t[proximityGridLength];
@@ -100,9 +100,17 @@ inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y)
}
bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
+ if (x < 0 || y < 0) {
+ if (DEBUG_DICT) {
+ LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
+ assert(false);
+ }
+ return false;
+ }
+
const int startIndex = getStartIndexFromCoordinates(x, y);
if (DEBUG_PROXIMITY_INFO) {
- LOGI("hasSpaceProximity: index %d", startIndex);
+ LOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
}
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
@@ -167,7 +175,7 @@ int ProximityInfo::getKeyIndex(const int c) const {
// We do not have the coordinate data
return NOT_A_INDEX;
}
- const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+ const unsigned short baseLowerC = toBaseLowerCase(c);
if (baseLowerC > MAX_CHAR_CODE) {
return NOT_A_INDEX;
}
@@ -232,7 +240,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde
const unsigned short c, const bool checkProximityChars, int *proximityIndex) const {
const int *currentChars = getProximityCharsAt(index);
const int firstChar = currentChars[0];
- const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+ const unsigned short baseLowerC = toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos.
@@ -245,7 +253,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde
// If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user
// typed. Treat it as a close char.
- if (Dictionary::toBaseLowerCase(firstChar) == baseLowerC)
+ if (toBaseLowerCase(firstChar) == baseLowerC)
return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 35e354c6e..9ca5505a7 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -26,7 +26,7 @@ namespace latinime {
class Correction;
class ProximityInfo {
-public:
+ public:
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
@@ -56,7 +56,7 @@ public:
bool existsCharInProximityAt(const int index, const int c) const;
bool existsAdjacentProximityChars(const int index) const;
ProximityType getMatchedProximityId(const int index, const unsigned short c,
- const bool checkProximityChars, int *proximityIndex = NULL) const;
+ const bool checkProximityChars, int *proximityIndex = 0) const;
int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
}
@@ -68,7 +68,7 @@ public:
return mTouchPositionCorrectionEnabled;
}
-private:
+ private:
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex
diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h
new file mode 100644
index 000000000..1f9815936
--- /dev/null
+++ b/native/src/terminal_attributes.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
+#define LATINIME_TERMINAL_ATTRIBUTES_H
+
+#include "unigram_dictionary.h"
+
+namespace latinime {
+
+/**
+ * This class encapsulates information about a terminal that allows to
+ * retrieve local node attributes like the list of shortcuts without
+ * exposing the format structure to the client.
+ */
+class TerminalAttributes {
+ public:
+ class ShortcutIterator {
+ const uint8_t* const mDict;
+ bool mHasNextShortcutTarget;
+ int mPos;
+
+ public:
+ ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict),
+ mPos(pos) {
+ mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
+ }
+
+ inline bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // see BinaryFormat::getWordAtAddress.
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
+ const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
+ mHasNextShortcutTarget =
+ 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
+ int shortcutAddress =
+ BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos);
+ return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord);
+ }
+ };
+
+ private:
+ const uint8_t* const mDict;
+ const uint8_t mFlags;
+ const int mStartPos;
+
+ public:
+ TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) :
+ mDict(dict), mFlags(flags), mStartPos(pos) {
+ }
+
+ inline bool isShortcutOnly() const {
+ return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY);
+ }
+
+ inline ShortcutIterator getShortcutIterator() const {
+ return ShortcutIterator(mDict, mStartPos, mFlags);
+ }
+};
+} // namespace latinime
+
+#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8eb5a9700..e95e03ce5 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -25,6 +25,7 @@
#include "unigram_dictionary.h"
#include "binary_format.h"
+#include "terminal_attributes.h"
namespace latinime {
@@ -48,19 +49,23 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
if (DEBUG_DICT) {
LOGI("UnigramDictionary - constructor");
}
- mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
}
UnigramDictionary::~UnigramDictionary() {
- delete mCorrection;
}
-static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
+static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize,
const int MAX_PROXIMITY_CHARS) {
return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;
}
-bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {
+// TODO: This needs to take an const unsigned short* and not tinker with its contents
+static inline void addWord(
+ unsigned short *word, int length, int frequency, WordsPriorityQueue *queue) {
+ queue->push(frequency, word, length);
+}
+
+bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codesSize) const {
// There can't be a digraph if we don't have at least 2 characters to examine
if (i + 2 > codesSize) return false;
@@ -86,9 +91,10 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
// codesRemain is the remaining size in codesSrc.
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
- const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
+ const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
+ const int codesBufferSize, const int flags, const int *codesSrc,
+ const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) {
@@ -105,8 +111,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, codesBufferSize, flags,
codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
- currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,
- frequencies);
+ currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction,
+ queuePool);
// Copy the second char of the digraph in place, then continue processing on
// the remaining part of the word.
@@ -114,9 +120,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,
BYTES_IN_ONE_CHAR);
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
- codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,
- codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,
- outWords, frequencies);
+ codesBuffer, codesBufferSize, flags,
+ codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1,
+ codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool);
return;
}
}
@@ -132,40 +138,39 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest, codesSrc, remainingBytes);
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies,
- flags);
+ (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction,
+ queuePool);
}
-int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
+ WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies) {
+ Correction* masterCorrection = correction;
if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)
{ // Incrementally tune the word and try all possibilities
int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);
+ codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool);
} else { // Normal processing
- getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
- outWords, frequencies, flags);
+ getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags,
+ masterCorrection, queuePool);
}
PROF_START(20);
- // Get the word count
- int suggestedWordsCount = 0;
- while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
- suggestedWordsCount++;
- }
+ const int suggestedWordsCount =
+ queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords);
if (DEBUG_DICT) {
LOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
#ifdef FLAG_DBG
- short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
+ short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
- LOGI("%s %i", s, mFrequencies[j]);
+ LOGI("%s %i", s, frequencies[j]);
#endif
}
}
@@ -175,23 +180,19 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
}
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies, const int flags) {
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const int inputLength, const int flags, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
PROF_OPEN;
PROF_START(0);
- initSuggestions(
- proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
- if (DEBUG_DICT) assert(codesSize == mInputLength);
-
- const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
- mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
+ // Note: This line is intentionally left blank
PROF_END(0);
- const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
- // TODO: remove
PROF_START(1);
- getSuggestionCandidates(useFullEditDistance);
+ const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
+ getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance,
+ inputLength, correction, queuePool);
PROF_END(1);
PROF_START(2);
@@ -209,12 +210,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(5);
// Suggestions with missing space
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
- && mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
- for (int i = 1; i < codesSize; ++i) {
+ && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
+ for (int i = 1; i < inputLength; ++i) {
if (DEBUG_DICT) {
LOGI("--- Suggest missing space characters %d", i);
}
- getMissingSpaceWords(mInputLength, i, mCorrection, useFullEditDistance);
+ getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, i, correction, queuePool);
}
}
PROF_END(5);
@@ -222,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(6);
if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
// The first and last "mistyped spaces" are taken care of by excessive character handling
- for (int i = 1; i < codesSize - 1; ++i) {
+ for (int i = 1; i < inputLength - 1; ++i) {
if (DEBUG_DICT) {
LOGI("--- Suggest words with proximity space %d", i);
}
@@ -233,7 +235,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
i, x, y, proximityInfo->hasSpaceProximity(x, y));
}
if (proximityInfo->hasSpaceProximity(x, y)) {
- getMistypedSpaceWords(mInputLength, i, mCorrection, useFullEditDistance);
+ getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, i, correction, queuePool);
}
}
}
@@ -241,153 +244,118 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies) {
+ const int *yCoordinates, const int *codes, const int inputLength,
+ WordsPriorityQueue *queue, Correction *correction) {
if (DEBUG_DICT) {
LOGI("initSuggest");
}
- mFrequencies = frequencies;
- mOutputChars = outWords;
- mInputLength = codesSize;
- proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates);
- mProximityInfo = proximityInfo;
-}
-
-static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
- if (c < nextLettersSize) {
- nextLetters[c]++;
- }
-}
-
-// TODO: We need to optimize addWord by using STL or something
-// TODO: This needs to take an const unsigned short* and not tinker with its contents
-bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
- word[length] = 0;
- if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Found word = %s, freq = %d", s, frequency);
-#endif
- }
- if (length > MAX_WORD_LENGTH) {
- if (DEBUG_DICT) {
- LOGI("Exceeded max word length.");
- }
- return false;
- }
-
- // Find the right insertion point
- int insertAt = 0;
- while (insertAt < MAX_WORDS) {
- // TODO: How should we sort words with the same frequency?
- if (frequency > mFrequencies[insertAt]) {
- break;
- }
- insertAt++;
- }
- if (insertAt < MAX_WORDS) {
- if (DEBUG_DICT) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
-#endif
- }
- memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
- (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
- (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
- mFrequencies[insertAt] = frequency;
- memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
- (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
- (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
- unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
- while (length--) {
- *dest++ = *word++;
- }
- *dest = 0; // NULL terminate
- if (DEBUG_DICT) {
- LOGI("Added word at %d", insertAt);
- }
- return true;
+ proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
+ if (queue) {
+ queue->clear();
}
- return false;
+ const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
+ correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
static const char QUOTE = '\'';
static const char SPACE = ' ';
-void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance) {
+void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ initSuggestions(
+ proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
+ getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
+ true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
+}
+
+void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
+ const int inputLength, Correction *correction, WordsPriorityQueue *queue,
+ const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams
- mCorrection->setCorrectionParams(0, 0, 0,
- -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance);
+ correction->setCorrectionParams(0, 0, 0,
+ -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
+ doAutoCompletion, maxErrors);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
int outputIndex = 0;
- mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0));
+ correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0));
// Depth first search
while (outputIndex >= 0) {
- if (mCorrection->initProcessState(outputIndex)) {
- int siblingPos = mCorrection->getTreeSiblingPos(outputIndex);
+ if (correction->initProcessState(outputIndex)) {
+ int siblingPos = correction->getTreeSiblingPos(outputIndex);
int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- mCorrection, &childCount, &firstChildPos, &siblingPos);
+ correction, &childCount, &firstChildPos, &siblingPos, queue);
// Update next sibling pos
- mCorrection->setTreeSiblingPos(outputIndex, siblingPos);
+ correction->setTreeSiblingPos(outputIndex, siblingPos);
if (needsToTraverseChildrenNodes) {
// Goes to child node
- outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos);
+ outputIndex = correction->goDownTree(outputIndex, childCount, firstChildPos);
}
} else {
// Goes to parent sibling node
- outputIndex = mCorrection->getTreeParentIndex(outputIndex);
+ outputIndex = correction->getTreeParentIndex(outputIndex);
}
}
}
-void UnigramDictionary::getMissingSpaceWords(
+void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction,
- const bool useFullEditDistance) {
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
- useFullEditDistance);
- getSplitTwoWordsSuggestion(inputLength, correction);
+ WordsPriorityQueuePool* queuePool) {
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
+ correction, queuePool);
}
-void UnigramDictionary::getMistypedSpaceWords(
+void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction,
- const bool useFullEditDistance) {
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
- useFullEditDistance);
- getSplitTwoWordsSuggestion(inputLength, correction);
-}
-
-inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
- const int inputIndex, const int skipPos, const int depth) {
- const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(inputIndex);
- // Skip the ' or other letter and continue deeper
- return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
+ WordsPriorityQueuePool* queuePool) {
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
+ correction, queuePool);
}
-inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
+inline void UnigramDictionary::onTerminal(const int freq,
+ const TerminalAttributes& terminalAttributes, Correction *correction,
+ WordsPriorityQueue *queue) {
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) {
- addWord(wordPointer, wordLength, finalFreq);
+ if (!terminalAttributes.isShortcutOnly()) {
+ addWord(wordPointer, wordLength, finalFreq, queue);
+ }
+ TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means to control the
+ // order of the shortcuts relative to one another or to the word. We need to either
+ // modulate the frequency of each shortcut according to its own shortcut frequency or
+ // to make the queue so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ }
}
}
-void UnigramDictionary::getSplitTwoWordsSuggestion(
- const int inputLength, Correction* correction) {
- const int spaceProximityPos = correction->getSpaceProximityPos();
- const int missingSpacePos = correction->getMissingSpacePos();
+void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+
if (DEBUG_DICT) {
int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount;
@@ -408,9 +376,19 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
return;
const int newWordLength = firstWordLength + secondWordLength + 1;
+
+
+ // Space proximity preparation
+ //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
+ //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
+ //correction);
+ //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
+ //MAX_ERRORS_FOR_TWO_WORDS);
+
// Allocating variable length array on stack
unsigned short word[newWordLength];
- const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);
+ const int firstFreq = getMostFrequentWordLike(
+ firstWordStartPos, firstWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
LOGI("First freq: %d", firstFreq);
}
@@ -420,7 +398,8 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i];
}
- const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);
+ const int secondFreq = getMostFrequentWordLike(
+ secondWordStartPos, secondWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
LOGI("Second freq: %d", secondFreq);
}
@@ -431,22 +410,29 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i - firstWordLength - 1];
}
- const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+ // TODO: Remove initSuggestions and correction->setCorrectionParams
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
+ 0 /* do not clear queue */, correction);
+
+ correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+ -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
+ useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
}
- addWord(word, newWordLength, pairFreq);
+ addWord(word, newWordLength, pairFreq, masterQueue);
return;
}
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
- const int inputLength, unsigned short *word) {
+ const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) {
uint16_t inWord[inputLength];
for (int i = 0; i < inputLength; ++i) {
- inWord[i] = (uint16_t)mProximityInfo->getPrimaryCharAt(startInputIndex + i);
+ inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i);
}
return getMostFrequentWordLikeInner(inWord, inputLength, word);
}
@@ -470,8 +456,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
- int32_t baseChar = Dictionary::toBaseLowerCase(character);
- const uint16_t wChar = Dictionary::toBaseLowerCase(inWord[startInputIndex]);
+ int32_t baseChar = toBaseLowerCase(character);
+ const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -483,8 +469,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
if (hasMultipleChars) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) {
- baseChar = Dictionary::toBaseLowerCase(character);
- if (Dictionary::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+ baseChar = toBaseLowerCase(character);
+ if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex;
return false;
@@ -597,7 +583,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount,
- int *newChildrenPosition, int *nextSiblingPosition) {
+ int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -648,7 +634,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true;
- } else if (stateType == Correction::UNRELATED) {
+ } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
// We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining
@@ -676,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(freq, mCorrection);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags,
+ BinaryFormat::skipFrequency(flags, pos));
+ onTerminal(freq, terminalAttributes, correction, queue);
}
// If there are more chars in this node, then this virtual node has children.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index ef9709a89..23581425a 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -22,17 +22,14 @@
#include "correction_state.h"
#include "defines.h"
#include "proximity_info.h"
-
-#ifndef NULL
-#define NULL 0
-#endif
+#include "words_priority_queue.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
+class TerminalAttributes;
class UnigramDictionary {
-
-public:
-
+ public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
@@ -46,8 +43,14 @@ public:
// Flag for terminal groups
static const int FLAG_IS_TERMINAL = 0x10;
+ // Flag for shortcut targets presence
+ static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
// Flag for bigram presence
static const int FLAG_HAS_BIGRAMS = 0x04;
+ // Flag for shortcut-only words. Some words are shortcut-only, which means they match when
+ // the user types them but they don't pop in the suggestion strip, only the words they are
+ // shortcuts for do.
+ static const int FLAG_IS_SHORTCUT_ONLY = 0x02;
// Attribute (bigram/shortcut) related flags:
// Flag for presence of more attributes
@@ -64,47 +67,63 @@ public:
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+ // Error tolerances
+ static const int DEFAULT_MAX_ERRORS = 2;
+ static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
+
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
const bool isLatestDictVersion);
bool isValidWord(const uint16_t* const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
- int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
+ Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies);
virtual ~UnigramDictionary();
-private:
-
+ private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies, const int flags);
- bool isDigraph(const int* codes, const int i, const int codesSize) const;
+ const int *ycoordinates, const int *codes, const int inputLength,
+ const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
+ bool isDigraph(const int *codes, const int i, const int codesSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
- const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
+ const int codesBufferSize, const int flags, const int* codesSrc,
+ const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies);
- void getSuggestionCandidates(const bool useFullEditDistance);
- bool addWord(unsigned short *word, int length, int frequency);
- void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
- void getMissingSpaceWords(const int inputLength, const int missingSpacePos,
- Correction *correction, const bool useFullEditDistance);
- void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos,
- Correction *correction, const bool useFullEditDistance);
- void onTerminal(const int freq, Correction *correction);
+ WordsPriorityQueue *queue, Correction *correction);
+ void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getSuggestionCandidates(
+ const bool useFullEditDistance, const int inputLength, Correction *correction,
+ WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
+ void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
+ const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, const int missingSpacePos, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
+ void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, const int spaceProximityPos, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
+ void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
+ Correction *correction, WordsPriorityQueue *queue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
- bool processCurrentNode(const int initialPos,
- Correction *correction, int *newCount,
- int *newChildPosition, int *nextSiblingPosition);
+ bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
+ int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
- unsigned short *word);
+ ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
- short unsigned int* outWord);
+ short unsigned int *outWord);
const uint8_t* const DICT_ROOT;
const int MAX_WORD_LENGTH;
@@ -127,14 +146,8 @@ private:
};
static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
- int *mFrequencies;
- unsigned short *mOutputChars;
- ProximityInfo *mProximityInfo;
- Correction *mCorrection;
- int mInputLength;
- // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
-
+ // Still bundled members
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
new file mode 100644
index 000000000..84f2523c2
--- /dev/null
+++ b/native/src/words_priority_queue.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H
+#define LATINIME_WORDS_PRIORITY_QUEUE_H
+
+#include <iostream>
+#include <queue>
+#include "defines.h"
+
+namespace latinime {
+
+class WordsPriorityQueue {
+ public:
+ class SuggestedWord {
+ public:
+ int mScore;
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mWordLength;
+ bool mUsed;
+
+ void setParams(int score, unsigned short* word, int wordLength) {
+ mScore = score;
+ mWordLength = wordLength;
+ memcpy(mWord, word, sizeof(unsigned short) * wordLength);
+ mUsed = true;
+ }
+ };
+
+ WordsPriorityQueue(int maxWords, int maxWordLength) :
+ MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH(
+ (unsigned int) maxWordLength) {
+ mSuggestedWords = new SuggestedWord[maxWordLength];
+ for (int i = 0; i < maxWordLength; ++i) {
+ mSuggestedWords[i].mUsed = false;
+ }
+ }
+ ~WordsPriorityQueue() {
+ delete[] mSuggestedWords;
+ }
+
+ void push(int score, unsigned short* word, int wordLength) {
+ SuggestedWord* sw = 0;
+ if (mSuggestions.size() >= MAX_WORDS) {
+ sw = mSuggestions.top();
+ const int minScore = sw->mScore;
+ if (minScore >= score) {
+ return;
+ } else {
+ sw->mUsed = false;
+ mSuggestions.pop();
+ }
+ }
+ if (sw == 0) {
+ sw = getFreeSuggestedWord(score, word, wordLength);
+ } else {
+ sw->setParams(score, word, wordLength);
+ }
+ if (sw == 0) {
+ LOGE("SuggestedWord is accidentally null.");
+ return;
+ }
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ LOGI("Push word. %d, %d", score, wordLength);
+ DUMP_WORD(word, wordLength);
+ }
+ mSuggestions.push(sw);
+ }
+
+ SuggestedWord* topAndPop() {
+ if (mSuggestions.empty()) return 0;
+ SuggestedWord* sw = mSuggestions.top();
+ mSuggestions.pop();
+ return sw;
+ }
+
+ int outputSuggestions(int *frequencies, unsigned short *outputChars) {
+ const unsigned int size = min(MAX_WORDS, mSuggestions.size());
+ int index = size - 1;
+ while (!mSuggestions.empty() && index >= 0) {
+ SuggestedWord* sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ LOGI("dump word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ const unsigned int wordLength = sw->mWordLength;
+ char* targetAdr = (char*) outputChars
+ + (index) * MAX_WORD_LENGTH * sizeof(short);
+ frequencies[index] = sw->mScore;
+ memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short));
+ if (wordLength < MAX_WORD_LENGTH) {
+ ((unsigned short*) targetAdr)[wordLength] = 0;
+ }
+ sw->mUsed = false;
+ mSuggestions.pop();
+ --index;
+ }
+ return size;
+ }
+
+ int size() {
+ return mSuggestions.size();
+ }
+
+ void clear() {
+ while (!mSuggestions.empty()) {
+ SuggestedWord* sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ LOGI("Clear word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ sw->mUsed = false;
+ mSuggestions.pop();
+ }
+ }
+
+ private:
+ struct wordComparator {
+ bool operator ()(SuggestedWord * left, SuggestedWord * right) {
+ return left->mScore > right->mScore;
+ }
+ };
+
+ SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
+ int wordLength) {
+ for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
+ if (!mSuggestedWords[i].mUsed) {
+ mSuggestedWords[i].setParams(score, word, wordLength);
+ return &mSuggestedWords[i];
+ }
+ }
+ return 0;
+ }
+
+ typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
+ wordComparator> Suggestions;
+ Suggestions mSuggestions;
+ const unsigned int MAX_WORDS;
+ const unsigned int MAX_WORD_LENGTH;
+ SuggestedWord* mSuggestedWords;
+};
+}
+
+#endif // LATINIME_WORDS_PRIORITY_QUEUE_H
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
new file mode 100644
index 000000000..386297650
--- /dev/null
+++ b/native/src/words_priority_queue_pool.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+
+#include "words_priority_queue.h"
+
+namespace latinime {
+
+class WordsPriorityQueuePool {
+ public:
+ WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
+ mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
+ mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ }
+
+ ~WordsPriorityQueuePool() {
+ delete mMasterQueue;
+ }
+
+ WordsPriorityQueue* getMasterQueue() {
+ return mMasterQueue;
+ }
+ // TODO: Come up with more generic pool
+ WordsPriorityQueue* getSubQueue1() {
+ return mSubQueue1;
+ }
+ WordsPriorityQueue* getSubQueue2() {
+ return mSubQueue2;
+ }
+
+ private:
+ WordsPriorityQueue *mMasterQueue;
+ WordsPriorityQueue *mSubQueue1;
+ WordsPriorityQueue *mSubQueue2;
+};
+}
+
+#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H