aboutsummaryrefslogtreecommitdiffstats
path: root/native/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/src')
-rw-r--r--native/src/bigram_dictionary.cpp10
-rw-r--r--native/src/binary_format.h4
-rw-r--r--native/src/correction.cpp191
-rw-r--r--native/src/correction.h5
-rw-r--r--native/src/debug.h6
-rw-r--r--native/src/defines.h57
-rw-r--r--native/src/dictionary.cpp4
-rw-r--r--native/src/dictionary.h75
-rw-r--r--native/src/proximity_info.cpp8
-rw-r--r--native/src/unigram_dictionary.cpp188
-rw-r--r--native/src/unigram_dictionary.h13
-rw-r--r--native/src/words_priority_queue.h21
-rw-r--r--native/src/words_priority_queue_pool.h56
13 files changed, 409 insertions, 229 deletions
diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp
index c340c6c1a..db7734bc7 100644
--- a/native/src/bigram_dictionary.cpp
+++ b/native/src/bigram_dictionary.cpp
@@ -32,8 +32,8 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion),
HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) {
if (DEBUG_DICT) {
- LOGI("BigramDictionary - constructor");
- LOGI("Has Bigram : %d", hasBigram);
+ AKLOGI("BigramDictionary - constructor");
+ AKLOGI("Has Bigram : %d", hasBigram);
}
}
@@ -46,7 +46,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
#ifdef FLAG_DBG
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
+ AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
#endif
}
@@ -60,7 +60,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
insertAt++;
}
if (DEBUG_DICT) {
- LOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
+ AKLOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
}
if (insertAt < mMaxBigrams) {
memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
@@ -76,7 +76,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
}
*dest = 0; // NULL terminate
if (DEBUG_DICT) {
- LOGI("Bigram: Added word at %d", insertAt);
+ AKLOGI("Bigram: Added word at %d", insertAt);
}
return true;
}
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index 9944fa2bd..1d74998f6 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -61,7 +61,9 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
}
inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) {
- return dict[(*pos)++];
+ const int msb = dict[(*pos)++];
+ if (msb < 0x80) return msb;
+ return ((msb & 0x7F) << 8) | dict[(*pos)++];
}
inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) {
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index dc31bfae7..6a129d4e3 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -42,7 +42,7 @@ inline static void initEditDistance(int *editDistanceTable) {
inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
const int outputLength) {
if (DEBUG_DICT) {
- LOGI("EditDistanceTable");
+ AKLOGI("EditDistanceTable");
for (int i = 0; i <= 10; ++i) {
int c[11];
for (int j = 0; j <= 10; ++j) {
@@ -52,7 +52,7 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int
c[j] = -1;
}
}
- LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+ AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
@@ -83,8 +83,8 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
- if (DEBUG_DICT) {
- LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
}
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
}
@@ -168,8 +168,8 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
- return -1;
+ if (outputIndex < MIN_SUGGEST_DEPTH) {
+ return NOT_A_FREQUENCY;
}
*word = mWord;
@@ -215,20 +215,10 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getOutputIndex() {
- return mOutputIndex;
-}
-
-// TODO: remove
int Correction::getInputIndex() {
return mInputIndex;
}
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
- return mNeedsToTraverseAllNodes;
-}
-
void Correction::incrementInputIndex() {
++mInputIndex;
}
@@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
mWord, mOutputIndex + 1);
}
-// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c);
- if (needsToTraverseAllNodes() && isTerminal) {
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ if (mNeedsToTraverseAllNodes && isTerminal) {
incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL;
} else {
@@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
}
}
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR;
}
@@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) {
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
// TODO: Change the limit if we'll allow two or more corrections
@@ -378,10 +374,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
--mTransposedCount;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
}
@@ -404,7 +400,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& isEquivalentChar(mProximityInfo->getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION) {
- LOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
+ AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
}
// Conversion p->e
// Example:
@@ -481,10 +477,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else {
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no
@@ -534,11 +530,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mTerminalOutputIndex = mOutputIndex - 1;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return ON_TERMINAL;
} else {
+ mTerminalInputIndex = mInputIndex - 1;
+ mTerminalOutputIndex = mOutputIndex - 1;
return NOT_ON_TERMINAL;
}
}
@@ -655,9 +653,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
int finalFreq = freq;
// TODO: Optimize this.
- // TODO: Ignoring edit distance for transposed char, for now
- if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
- ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1);
+ if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
+ ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1)
+ - transposedCount;
+
const int matchWeight = powerIntCapped(typedLetterMultiplier,
max(inputLength, outputIndex + 1) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
@@ -669,21 +668,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
ed = max(0, ed - quoteDiffCount);
- if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
- // Promote a word with just one skipped or excessive char
- if (sameLength) {
- multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
- } else {
+ if (transposedCount < 1) {
+ if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
+ // Promote a word with just one skipped or excessive char
+ if (sameLength) {
+ multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
+ } else {
+ multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ }
+ } else if (ed == 0) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ sameLength = true;
}
- } else if (ed == 0) {
- multiplyIntCapped(typedLetterMultiplier, &finalFreq);
- sameLength = true;
}
adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
proximityMatchedCount);
} else {
- // TODO: Calculate the edit distance for transposed char
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq);
}
@@ -703,7 +703,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
/ (10 * inputLength
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
if (DEBUG_DICT_FULL) {
- LOGI("Demotion rate for missing character is %d.", demotionRate);
+ AKLOGI("Demotion rate for missing character is %d.", demotionRate);
}
multiplyRate(demotionRate, &finalFreq);
}
@@ -717,7 +717,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_CORRECTION_FREQ) {
- LOGI("Double excessive demotion");
+ AKLOGI("Double excessive demotion");
}
// If an excessive character is not adjacent to the left char or the right char,
// we will demote this word.
@@ -767,7 +767,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
// A word with proximity corrections
if (DEBUG_DICT_FULL) {
- LOGI("Found a proximity correction.");
+ AKLOGI("Found a proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
@@ -828,12 +828,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
}
if (DEBUG_DICT_FULL) {
- LOGI("calc: %d, %d", outputIndex, sameLength);
+ AKLOGI("calc: %d, %d", outputIndex, sameLength);
}
if (DEBUG_CORRECTION_FREQ) {
DUMP_WORD(correction->mWord, outputIndex + 1);
- LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
+ AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
quoteDiffCount, ed, finalFreq);
}
@@ -874,7 +874,102 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
if (DEBUG_DICT_FULL) {
- LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ }
+
+ if (firstWordLength == 0 || secondWordLength == 0) {
+ return 0;
+ }
+ const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
+ int tempFirstFreq = firstFreq;
+ multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+ const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
+ int tempSecondFreq = secondFreq;
+ multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+ const int totalLength = firstWordLength + secondWordLength;
+
+ // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+ // length.
+ int totalFreq = tempFirstFreq + tempSecondFreq;
+
+ // This is a workaround to try offsetting the not-enough-demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+ // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+ // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+ const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+ multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+ // At this moment, totalFreq is calculated by the following formula:
+ // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+ // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+ multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+ // This is another workaround to offset the demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+ // the same amount because we already have adjusted the synthetic freq of this "missing or
+ // mistyped space" suggestion candidate above in this method.
+ const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+ multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+ if (isSpaceProximity) {
+ // A word pair with one space proximity correction
+ if (DEBUG_DICT) {
+ AKLOGI("Found a word pair with space proximity correction.");
+ }
+ multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
+
+ multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+ if (capitalizedWordDemotion) {
+ multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+ }
+
+ return totalFreq;
+}
+
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld(
+ const int firstFreq, const int secondFreq, const Correction* correction,
+ const unsigned short *word) {
+ const int spaceProximityPos = correction->mSpaceProximityPos;
+ const int missingSpacePos = correction->mMissingSpacePos;
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int inputLength = correction->mInputLength;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+ bool firstCapitalizedWordDemotion = false;
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ bool secondCapitalizedWordDemotion = false;
+ if (secondWordLength >= 2) {
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
+
+ const bool capitalizedWordDemotion =
+ firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+ if (DEBUG_DICT_FULL) {
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
}
if (firstWordLength == 0 || secondWordLength == 0) {
@@ -919,7 +1014,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
if (isSpaceProximity) {
// A word pair with one space proximity correction
if (DEBUG_DICT) {
- LOGI("Found a word pair with space proximity correction.");
+ AKLOGI("Found a word pair with space proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &totalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
@@ -965,10 +1060,10 @@ inline static int editDistanceInternal(
}
if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
for (int i = 0; i < li; ++i) {
for (int j = 0; j < lo; ++j) {
- LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+ AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
}
}
}
diff --git a/native/src/correction.h b/native/src/correction.h
index 4012e7e82..22a424f5c 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -48,7 +48,6 @@ class Correction {
void checkState();
bool initProcessState(const int index);
- int getOutputIndex();
int getInputIndex();
virtual ~Correction();
@@ -101,6 +100,8 @@ class Correction {
const int freq, int *editDistanceTable, const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
const Correction* correction, const unsigned short *word);
+ static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
const unsigned short* after, const int afterLength, const int score);
static int editDistance(const unsigned short* before,
@@ -115,11 +116,11 @@ class Correction {
private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
- inline bool needsToTraverseAllNodes();
inline void startToTraverseAllNodes();
inline bool isQuote(const unsigned short c);
inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+ inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c);
const int TYPED_LETTER_MULTIPLIER;
diff --git a/native/src/debug.h b/native/src/debug.h
index 38b2f107a..b13052c95 100644
--- a/native/src/debug.h
+++ b/native/src/debug.h
@@ -42,7 +42,7 @@ static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned s
static inline void LOGI_S16(unsigned short* string, const unsigned int length) {
unsigned char tmp_buffer[length];
convertToUnibyteString(string, tmp_buffer, length);
- LOGI(">> %s", tmp_buffer);
+ AKLOGI(">> %s", tmp_buffer);
// The log facility is throwing out log that comes too fast. The following
// is a dirty way of slowing down processing so that we can see all log.
// TODO : refactor this in a blocking log or something.
@@ -53,7 +53,7 @@ static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int leng
unsigned char c) {
unsigned char tmp_buffer[length+1];
convertToUnibyteStringAndReplaceLastChar(string, tmp_buffer, length, c);
- LOGI(">> %s", tmp_buffer);
+ AKLOGI(">> %s", tmp_buffer);
// Likewise
// usleep(10);
}
@@ -64,7 +64,7 @@ static inline void printDebug(const char* tag, int* codes, int codesSize, int MA
buf[codesSize] = 0;
while (--codesSize >= 0)
buf[codesSize] = (unsigned char)codes[codesSize * MAX_PROXIMITY_CHARS];
- LOGI("%s, WORD = %s", tag, buf);
+ AKLOGI("%s, WORD = %s", tag, buf);
free(buf);
}
diff --git a/native/src/defines.h b/native/src/defines.h
index d636e5197..d739043a4 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -20,15 +20,32 @@
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <cutils/log.h>
+#define AKLOGE ALOGE
+#define AKLOGI ALOGI
+
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
+
+static char charBuf[50];
+
+static void dumpWord(const unsigned short* word, const int length) {
+ for (int i = 0; i < length; ++i) {
+ charBuf[i] = word[i];
+ }
+ charBuf[length] = 0;
+ AKLOGI("[ %s ]", charBuf);
+}
+
#else
-#define LOGE(fmt, ...)
-#define LOGI(fmt, ...)
+#define AKLOGE(fmt, ...)
+#define AKLOGI(fmt, ...)
+#define DUMP_WORD(word, length)
#endif
#ifdef FLAG_DO_PROFILE
// Profiler
#include <cutils/log.h>
#include <time.h>
+
#define PROF_BUF_SIZE 100
static double profile_buf[PROF_BUF_SIZE];
static double profile_old[PROF_BUF_SIZE];
@@ -42,8 +59,8 @@ static unsigned int profile_counter[PROF_BUF_SIZE];
#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0)
#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
#define PROF_CLOCKOUT(prof_buf_id) \
- LOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
-#define PROF_OUTALL do { LOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
+ AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
+#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
static void prof_reset(void) {
for (int i = 0; i < PROF_BUF_SIZE; ++i) {
@@ -55,9 +72,9 @@ static void prof_reset(void) {
static void prof_out(void) {
if (profile_counter[PROF_BUF_SIZE - 1] != 1) {
- LOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
+ AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
}
- LOGI("Total time is %6.3f ms.",
+ AKLOGI("Total time is %6.3f ms.",
profile_buf[PROF_BUF_SIZE - 1] * 1000 / (double)CLOCKS_PER_SEC);
double all = 0;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
@@ -66,7 +83,7 @@ static void prof_out(void) {
if (all == 0) all = 1;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
if (profile_buf[i] != 0) {
- LOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
+ AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
i, (profile_buf[i] * 100 / all),
profile_buf[i] * 1000 / (double)CLOCKS_PER_SEC, profile_counter[i]);
}
@@ -100,20 +117,8 @@ static void prof_out(void) {
#define DEBUG_TRACE DEBUG_DICT_FULL
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
-#define DEBUG_CORRECTION_FREQ true
-#define DEBUG_WORDS_PRIORITY_QUEUE true
-
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
-
-static char charBuf[50];
-
-static void dumpWord(const unsigned short* word, const int length) {
- for (int i = 0; i < length; ++i) {
- charBuf[i] = word[i];
- }
- charBuf[length] = 0;
- LOGI("[ %s ]", charBuf);
-}
+#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_WORDS_PRIORITY_QUEUE false
#else // FLAG_DBG
@@ -128,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
-#define DUMP_WORD(word, length)
#endif // FLAG_DBG
@@ -168,6 +172,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
#define NOT_A_INDEX -1
+#define NOT_A_FREQUENCY -1
#define KEYCODE_SPACE ' '
@@ -185,7 +190,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
-#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
+#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
@@ -204,7 +209,11 @@ static void dumpWord(const unsigned short* word, const int length) {
// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
// for better performance.
-#define SUB_QUEUE_MAX_WORDS 5
+// Holds up to 1 candidate for each word
+#define SUB_QUEUE_MAX_WORDS 1
+#define SUB_QUEUE_MAX_COUNT 10
+
+#define TWO_WORDS_CORRECTION_THRESHOLD 0.22f
#define MAX_DEPTH_MULTIPLIER 3
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index e3673d425..822c2151d 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -33,9 +33,9 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) {
if (DEBUG_DICT) {
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
- LOGI("Max word length (%d) is greater than %d",
+ AKLOGI("Max word length (%d) is greater than %d",
maxWordLength, MAX_WORD_LENGTH_INTERNAL);
- LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
+ AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
}
}
mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 79d377a4f..90d7148d5 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -56,16 +56,7 @@ class Dictionary {
// public static utility methods
// static inline methods should be defined in the header file
- static unsigned short getChar(const unsigned char *dict, int *pos);
- static int getCount(const unsigned char *dict, int *pos);
- static bool getTerminal(const unsigned char *dict, int *pos);
- static int getAddress(const unsigned char *dict, int *pos);
- static int getFreq(const unsigned char *dict, const bool isLatestDictVersion, int *pos);
static int wideStrLen(unsigned short *str);
- // returns next sibling's position
- static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
- const int pos, unsigned short *c, int *childrenPosition,
- bool *terminal, int *freq);
private:
bool hasBigram();
@@ -87,56 +78,6 @@ class Dictionary {
// public static utility methods
// static inline methods should be defined in the header file
-inline unsigned short Dictionary::getChar(const unsigned char *dict, int *pos) {
- unsigned short ch = (unsigned short) (dict[(*pos)++] & 0xFF);
- // If the code is 255, then actual 16 bit code follows (in big endian)
- if (ch == 0xFF) {
- ch = ((dict[*pos] & 0xFF) << 8) | (dict[*pos + 1] & 0xFF);
- (*pos) += 2;
- }
- return ch;
-}
-
-inline int Dictionary::getCount(const unsigned char *dict, int *pos) {
- return dict[(*pos)++] & 0xFF;
-}
-
-inline bool Dictionary::getTerminal(const unsigned char *dict, int *pos) {
- return (dict[*pos] & FLAG_TERMINAL_MASK) > 0;
-}
-
-inline int Dictionary::getAddress(const unsigned char *dict, int *pos) {
- int address = 0;
- if ((dict[*pos] & FLAG_ADDRESS_MASK) == 0) {
- *pos += 1;
- } else {
- address += (dict[*pos] & (ADDRESS_MASK >> 16)) << 16;
- address += (dict[*pos + 1] & 0xFF) << 8;
- address += (dict[*pos + 2] & 0xFF);
- *pos += 3;
- }
- return address;
-}
-
-inline int Dictionary::getFreq(const unsigned char *dict,
- const bool isLatestDictVersion, int *pos) {
- int freq = dict[(*pos)++] & 0xFF;
- if (isLatestDictVersion) {
- // skipping bigram
- int bigramExist = (dict[*pos] & FLAG_BIGRAM_READ);
- if (bigramExist > 0) {
- int nextBigramExist = 1;
- while (nextBigramExist > 0) {
- (*pos) += 3;
- nextBigramExist = (dict[(*pos)++] & FLAG_BIGRAM_CONTINUED);
- }
- } else {
- (*pos)++;
- }
- }
- return freq;
-}
-
inline int Dictionary::wideStrLen(unsigned short *str) {
if (!str) return 0;
unsigned short *end = str;
@@ -144,22 +85,6 @@ inline int Dictionary::wideStrLen(unsigned short *str) {
end++;
return end - str;
}
-
-inline int Dictionary::setDictionaryValues(const unsigned char *dict,
- const bool isLatestDictVersion, const int pos, unsigned short *c,int *childrenPosition,
- bool *terminal, int *freq) {
- int position = pos;
- // -- at char
- *c = Dictionary::getChar(dict, &position);
- // -- at flag/add
- *terminal = Dictionary::getTerminal(dict, &position);
- *childrenPosition = Dictionary::getAddress(dict, &position);
- // -- after address or flag
- *freq = (*terminal) ? Dictionary::getFreq(dict, isLatestDictVersion, &position) : 1;
- // returns next sibling's position
- return position;
-}
-
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index 95e35263b..b91957c77 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -52,7 +52,7 @@ ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboard
const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
mProximityCharsArray = new uint32_t[proximityGridLength];
if (DEBUG_PROXIMITY_INFO) {
- LOGI("Create proximity info array %d", proximityGridLength);
+ AKLOGI("Create proximity info array %d", proximityGridLength);
}
memcpy(mProximityCharsArray, proximityCharsArray,
proximityGridLength * sizeof(mProximityCharsArray[0]));
@@ -102,7 +102,7 @@ inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y)
bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) {
if (DEBUG_DICT) {
- LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
+ AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
assert(false);
}
return false;
@@ -110,11 +110,11 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
const int startIndex = getStartIndexFromCoordinates(x, y);
if (DEBUG_PROXIMITY_INFO) {
- LOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
+ AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
}
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
- LOGI("Index: %d", mProximityCharsArray[startIndex + i]);
+ AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]);
}
if (mProximityCharsArray[startIndex + i] == KEYCODE_SPACE) {
return true;
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index e95e03ce5..8be95bc40 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -47,7 +47,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
if (DEBUG_DICT) {
- LOGI("UnigramDictionary - constructor");
+ AKLOGI("UnigramDictionary - constructor");
}
}
@@ -163,14 +163,14 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords);
if (DEBUG_DICT) {
- LOGI("Returning %d words", suggestedWordsCount);
+ AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
#ifdef FLAG_DBG
short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
- LOGI("%s %i", s, frequencies[j]);
+ AKLOGI("%s %i", s, frequencies[j]);
#endif
}
}
@@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_OPEN;
PROF_START(0);
- // Note: This line is intentionally left blank
+ queuePool->clearAll();
PROF_END(0);
PROF_START(1);
@@ -213,7 +213,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
&& inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
for (int i = 1; i < inputLength; ++i) {
if (DEBUG_DICT) {
- LOGI("--- Suggest missing space characters %d", i);
+ AKLOGI("--- Suggest missing space characters %d", i);
}
getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, i, correction, queuePool);
@@ -226,12 +226,12 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
// The first and last "mistyped spaces" are taken care of by excessive character handling
for (int i = 1; i < inputLength - 1; ++i) {
if (DEBUG_DICT) {
- LOGI("--- Suggest words with proximity space %d", i);
+ AKLOGI("--- Suggest words with proximity space %d", i);
}
const int x = xcoordinates[i];
const int y = ycoordinates[i];
if (DEBUG_PROXIMITY_INFO) {
- LOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
+ AKLOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
i, x, y, proximityInfo->hasSpaceProximity(x, y));
}
if (proximityInfo->hasSpaceProximity(x, y)) {
@@ -241,18 +241,33 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
}
PROF_END(6);
+ if (DEBUG_DICT) {
+ queuePool->dumpSubQueue1TopSuggestions();
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ WordsPriorityQueue* queue = queuePool->getSubQueue1(i);
+ if (queue->size() > 0) {
+ WordsPriorityQueue::SuggestedWord* sw = queue->top();
+ const int score = sw->mScore;
+ const unsigned short* word = sw->mWord;
+ const int wordLength = sw->mWordLength;
+ double ns = Correction::RankingAlgorithm::calcNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
+ ns += 0;
+ AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
+ (ns > TWO_WORDS_CORRECTION_THRESHOLD));
+ DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
+ DUMP_WORD(word, wordLength);
+ }
+ }
+ }
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int inputLength,
- WordsPriorityQueue *queue, Correction *correction) {
+ const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
if (DEBUG_DICT) {
- LOGI("initSuggest");
+ AKLOGI("initSuggest");
}
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
- if (queue) {
- queue->clear();
- }
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
@@ -264,15 +279,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueuePool *queuePool) {
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- initSuggestions(
- proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
- getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+ getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
}
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
- const int inputLength, Correction *correction, WordsPriorityQueue *queue,
+ const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0,
@@ -280,7 +293,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
doAutoCompletion, maxErrors);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
- int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
+ int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition);
int outputIndex = 0;
correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0));
@@ -292,7 +305,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- correction, &childCount, &firstChildPos, &siblingPos, queue);
+ correction, &childCount, &firstChildPos, &siblingPos, queuePool);
// Update next sibling pos
correction->setTreeSiblingPos(outputIndex, siblingPos);
@@ -327,14 +340,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
inline void UnigramDictionary::onTerminal(const int freq,
const TerminalAttributes& terminalAttributes, Correction *correction,
- WordsPriorityQueue *queue) {
+ WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
+ const int inputIndex = correction->getInputIndex();
+ const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
+ if (!addToMasterQueue && !addToSubQueue) {
+ return;
+ }
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
- if (finalFreq >= 0) {
+ if (finalFreq != NOT_A_FREQUENCY) {
if (!terminalAttributes.isShortcutOnly()) {
- addWord(wordPointer, wordLength, finalFreq, queue);
+ if (addToMasterQueue) {
+ addWord(wordPointer, wordLength, finalFreq, masterQueue);
+ }
+ // TODO: Check the validity of "inputIndex == wordLength"
+ //if (addToSubQueue && inputIndex == wordLength) {
+ if (addToSubQueue) {
+ addWord(wordPointer, wordLength, finalFreq, subQueue);
+ }
+ }
+ // Please note that the shortcut candidates will be added to the master queue only.
+ if (!addToMasterQueue) {
+ return;
}
+
+ // From here, below is the code to add shortcut candidates.
TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
while (iterator.hasNextShortcutTarget()) {
// TODO: addWord only supports weak ordering, meaning we have no means to control the
@@ -345,7 +378,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
- addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
}
}
}
@@ -390,7 +423,81 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
const int firstFreq = getMostFrequentWordLike(
firstWordStartPos, firstWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
- LOGI("First freq: %d", firstFreq);
+ AKLOGI("First freq: %d", firstFreq);
+ }
+ if (firstFreq <= 0) return;
+
+ for (int i = 0; i < firstWordLength; ++i) {
+ word[i] = mWord[i];
+ }
+
+ const int secondFreq = getMostFrequentWordLike(
+ secondWordStartPos, secondWordLength, proximityInfo, mWord);
+ if (DEBUG_DICT) {
+ AKLOGI("Second freq: %d", secondFreq);
+ }
+ if (secondFreq <= 0) return;
+
+ word[firstWordLength] = SPACE;
+ for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
+ word[i] = mWord[i - firstWordLength - 1];
+ }
+
+ // TODO: Remove initSuggestions and correction->setCorrectionParams
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+
+ correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+ -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
+ useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+ if (DEBUG_DICT) {
+ AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ }
+ addWord(word, newWordLength, pairFreq, masterQueue);
+ return;
+}
+
+void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int firstWordStartPos = 0;
+ const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity
+ ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+
+ if (inputLength >= MAX_WORD_LENGTH) return;
+ if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
+ || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
+ return;
+
+ const int newWordLength = firstWordLength + secondWordLength + 1;
+
+
+ // Space proximity preparation
+ //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
+ //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
+ //correction);
+ //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
+ //MAX_ERRORS_FOR_TWO_WORDS);
+
+ // Allocating variable length array on stack
+ unsigned short word[newWordLength];
+ const int firstFreq = getMostFrequentWordLike(
+ firstWordStartPos, firstWordLength, proximityInfo, mWord);
+ if (DEBUG_DICT) {
+ AKLOGI("First freq: %d", firstFreq);
}
if (firstFreq <= 0) return;
@@ -401,7 +508,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
const int secondFreq = getMostFrequentWordLike(
secondWordStartPos, secondWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
- LOGI("Second freq: %d", secondFreq);
+ AKLOGI("Second freq: %d", secondFreq);
}
if (secondFreq <= 0) return;
@@ -411,15 +518,14 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
}
// TODO: Remove initSuggestions and correction->setCorrectionParams
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
- 0 /* do not clear queue */, correction);
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
if (DEBUG_DICT) {
- LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
}
addWord(word, newWordLength, pairFreq, masterQueue);
return;
@@ -507,9 +613,10 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
int maxFreq = -1;
const uint8_t* const root = DICT_ROOT;
- mStackChildCount[0] = root[0];
+ int startPos = 0;
+ mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
mStackInputIndex[0] = 0;
- mStackSiblingPos[0] = 1;
+ mStackSiblingPos[0] = startPos;
while (depth >= 0) {
const int charGroupCount = mStackChildCount[depth];
int pos = mStackSiblingPos[depth];
@@ -583,7 +690,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount,
- int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
+ int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -658,14 +765,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
} while (NOT_A_CHARACTER != c);
if (isTerminalNode) {
- if (needsToInvokeOnTerminal) {
- // The frequency should be here, because we come here only if this is actually
- // a terminal node, and we are on its last char.
- const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- TerminalAttributes terminalAttributes(DICT_ROOT, flags,
- BinaryFormat::skipFrequency(flags, pos));
- onTerminal(freq, terminalAttributes, correction, queue);
- }
+ // The frequency should be here, because we come here only if this is actually
+ // a terminal node, and we are on its last char.
+ const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+ const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
+ const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
+ onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);
// If there are more chars in this node, then this virtual node has children.
// If we are on the last char, this virtual node has children if this node has.
@@ -690,7 +796,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
*nextSiblingPosition =
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
if (DEBUG_DICT_FULL) {
- LOGI("Traversing was pruned.");
+ AKLOGI("Traversing was pruned.");
}
return false;
}
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 23581425a..b950971bb 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -93,18 +93,21 @@ class UnigramDictionary {
const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
- WordsPriorityQueue *queue, Correction *correction);
+ const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, Correction *correction,
- WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
+ WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
+ const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction,
@@ -114,12 +117,12 @@ class UnigramDictionary {
const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool);
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
- Correction *correction, WordsPriorityQueue *queue);
+ Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
- int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
+ int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index 84f2523c2..6262439b5 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@@ -48,6 +48,7 @@ class WordsPriorityQueue {
mSuggestedWords[i].mUsed = false;
}
}
+
~WordsPriorityQueue() {
delete[] mSuggestedWords;
}
@@ -70,20 +71,19 @@ class WordsPriorityQueue {
sw->setParams(score, word, wordLength);
}
if (sw == 0) {
- LOGE("SuggestedWord is accidentally null.");
+ AKLOGE("SuggestedWord is accidentally null.");
return;
}
if (DEBUG_WORDS_PRIORITY_QUEUE) {
- LOGI("Push word. %d, %d", score, wordLength);
+ AKLOGI("Push word. %d, %d", score, wordLength);
DUMP_WORD(word, wordLength);
}
mSuggestions.push(sw);
}
- SuggestedWord* topAndPop() {
+ SuggestedWord* top() {
if (mSuggestions.empty()) return 0;
SuggestedWord* sw = mSuggestions.top();
- mSuggestions.pop();
return sw;
}
@@ -93,7 +93,7 @@ class WordsPriorityQueue {
while (!mSuggestions.empty() && index >= 0) {
SuggestedWord* sw = mSuggestions.top();
if (DEBUG_WORDS_PRIORITY_QUEUE) {
- LOGI("dump word. %d", sw->mScore);
+ AKLOGI("dump word. %d", sw->mScore);
DUMP_WORD(sw->mWord, sw->mWordLength);
}
const unsigned int wordLength = sw->mWordLength;
@@ -111,7 +111,7 @@ class WordsPriorityQueue {
return size;
}
- int size() {
+ int size() const {
return mSuggestions.size();
}
@@ -119,7 +119,7 @@ class WordsPriorityQueue {
while (!mSuggestions.empty()) {
SuggestedWord* sw = mSuggestions.top();
if (DEBUG_WORDS_PRIORITY_QUEUE) {
- LOGI("Clear word. %d", sw->mScore);
+ AKLOGI("Clear word. %d", sw->mScore);
DUMP_WORD(sw->mWord, sw->mWordLength);
}
sw->mUsed = false;
@@ -127,6 +127,13 @@ class WordsPriorityQueue {
}
}
+ void dumpTopWord() {
+ if (size() <= 0) {
+ return;
+ }
+ DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
+ }
+
private:
struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
index 386297650..5fa254852 100644
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+#include <assert.h>
+#include <new>
#include "words_priority_queue.h"
namespace latinime {
@@ -24,30 +26,60 @@ namespace latinime {
class WordsPriorityQueuePool {
public:
WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
- mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
- mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
- mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
+ for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT;
+ ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
+ mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset)
+ WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset)
+ WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ }
}
- ~WordsPriorityQueuePool() {
- delete mMasterQueue;
+ virtual ~WordsPriorityQueuePool() {
}
WordsPriorityQueue* getMasterQueue() {
return mMasterQueue;
}
+
// TODO: Come up with more generic pool
- WordsPriorityQueue* getSubQueue1() {
- return mSubQueue1;
+ WordsPriorityQueue* getSubQueue1(const int id) {
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT);
+ }
+ return mSubQueues1[id];
+ }
+
+ WordsPriorityQueue* getSubQueue2(const int id) {
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT);
+ }
+ return mSubQueues2[id];
}
- WordsPriorityQueue* getSubQueue2() {
- return mSubQueue2;
+
+ inline void clearAll() {
+ mMasterQueue->clear();
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ mSubQueues1[i]->clear();
+ mSubQueues2[i]->clear();
+ }
+ }
+
+ void dumpSubQueue1TopSuggestions() {
+ AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ mSubQueues1[i]->dumpTopWord();
+ }
}
private:
- WordsPriorityQueue *mMasterQueue;
- WordsPriorityQueue *mSubQueue1;
- WordsPriorityQueue *mSubQueue2;
+ WordsPriorityQueue* mMasterQueue;
+ WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];
+ WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT];
+ char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
+ char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+ char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
};
}