1 files changed, 118 insertions, 0 deletions
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
new file mode 100644
index 000000000..90c98149b
--- /dev/null
+++ b/native/src/unigram_dictionary.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_DICTIONARY_H
+#define LATINIME_UNIGRAM_DICTIONARY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class UnigramDictionary {
+
+    typedef enum {                             // Used as a return value for character comparison
+        SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR,  // Same char, possibly with different case or accent
+        NEAR_PROXIMITY_CHAR,                   // It is a char located nearby on the keyboard
+        UNRELATED_CHAR                         // It is an unrelated char
+    } ProximityType;
+
+public:
+    UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
+            int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
+    int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
+            int *nextLetters, int nextLettersSize);
+    ~UnigramDictionary();
+
+private:
+    void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
+    void getSuggestionCandidates(const int skipPos, const int excessivePos,
+            const int transposedPos, int *nextLetters, const int nextLettersSize,
+            const int maxDepth);
+    void getVersionNumber();
+    bool checkIfDictVersionIsLatest();
+    int getAddress(int *pos);
+    int getFreq(int *pos);
+    int wideStrLen(unsigned short *str);
+    bool sameAsTyped(unsigned short *word, int length);
+    bool addWord(unsigned short *word, int length, int frequency);
+    unsigned short toLowerCase(unsigned short c);
+    void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
+            const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
+            const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
+            const int nextLettersSize);
+    bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
+    // Keep getWordsOld for comparing performance between getWords and getWordsOld
+    void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
+            const int excessivePos, const int transposedPos, int *nextLetters,
+            const int nextLettersSize);
+    void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
+    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
+            const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
+    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
+            const int inputIndex, const int depth, const int snr, int *nextLetters,
+            const int nextLettersSize, const int skipPos, const int excessivePos,
+            const int transposedPos, const int freq);
+    void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
+            const int inputIndex, const int depth, const int snr, const int skipPos,
+            const int excessivePos, const int transposedPos, const int freq);
+    bool needsToSkipCurrentNode(const unsigned short c,
+            const int inputIndex, const int skipPos, const int depth);
+    ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
+            const int skipPos, const int excessivePos, const int transposedPos);
+    // Process a node by considering proximity, missing and excessive character
+    bool processCurrentNode(const int pos, const int depth,
+            const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
+            const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+            int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
+            bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
+            int *nextSiblingPosition);
+    int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
+    // Process a node by considering missing space
+    bool processCurrentNodeForExactMatch(const int firstChildPos,
+            const int startInputIndex, const int depth, unsigned short *word,
+            int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
+    bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
+    int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);}
+    const unsigned char *DICT;
+    const int MAX_WORD_LENGTH;
+    const int MAX_WORDS;
+    const int MAX_PROXIMITY_CHARS;
+    const bool IS_LATEST_DICT_VERSION;
+    const int TYPED_LETTER_MULTIPLIER;
+    const int FULL_WORD_MULTIPLIER;
+    const int ROOT_POS;
+
+    int *mFrequencies;
+    unsigned short *mOutputChars;
+    int *mInputCodes;
+    int mInputLength;
+    // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
+    unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+    int mMaxEditDistance;
+
+    int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
+    bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
+    int mStackNodeFreq[MAX_WORD_LENGTH_INTERNAL];
+    int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
+    int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
+    int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
+};
+
+// ----------------------------------------------------------------------------
+
+}; // namespace latinime
+
+#endif // LATINIME_UNIGRAM_DICTIONARY_H