1 files changed, 148 insertions, 0 deletions
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
new file mode 100644
index 000000000..3d3007ce0
--- /dev/null
+++ b/native/src/unigram_dictionary.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_DICTIONARY_H
+#define LATINIME_UNIGRAM_DICTIONARY_H
+
+#include "defines.h"
+#include "proximity_info.h"
+
+namespace latinime {
+
+class UnigramDictionary {
+
+    typedef enum {                             // Used as a return value for character comparison
+        SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR,  // Same char, possibly with different case or accent
+        NEAR_PROXIMITY_CHAR,                   // It is a char located nearby on the keyboard
+        UNRELATED_CHAR                         // It is an unrelated char
+    } ProximityType;
+
+public:
+    UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
+            int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
+    int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+            const int *ycoordinates, const int *codes, const int codesSize, const int flags,
+            unsigned short *outWords, int *frequencies);
+    ~UnigramDictionary();
+
+private:
+    void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+            const int *ycoordinates, const int *codes, const int codesSize,
+            unsigned short *outWords, int *frequencies);
+    bool isDigraph(const int* codes, const int i, const int codesSize) const;
+    void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
+        const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
+        const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
+        const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
+    void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords,
+            int *frequencies);
+    void getSuggestionCandidates(const int skipPos, const int excessivePos,
+            const int transposedPos, int *nextLetters, const int nextLettersSize,
+            const int maxDepth);
+    void getVersionNumber();
+    bool checkIfDictVersionIsLatest();
+    int getAddress(int *pos);
+    int getFreq(int *pos);
+    int wideStrLen(unsigned short *str);
+    bool sameAsTyped(unsigned short *word, int length);
+    bool addWord(unsigned short *word, int length, int frequency);
+    unsigned short toBaseLowerCase(unsigned short c);
+    void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
+            const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
+            const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
+            const int nextLettersSize);
+    bool getSplitTwoWordsSuggestion(const int inputLength,
+            const int firstWordStartPos, const int firstWordLength,
+            const int secondWordStartPos, const int secondWordLength);
+    bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
+    bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos);
+    // Keep getWordsOld for comparing performance between getWords and getWordsOld
+    void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
+            const int excessivePos, const int transposedPos, int *nextLetters,
+            const int nextLettersSize);
+    void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
+    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
+            const int excessivePos, const int transposedPos, const int freq,
+            const bool sameLength) const;
+    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
+            const int inputIndex, const int depth, const int snr, int *nextLetters,
+            const int nextLettersSize, const int skipPos, const int excessivePos,
+            const int transposedPos, const int freq);
+    void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
+            const int inputIndex, const int depth, const int snr, const int skipPos,
+            const int excessivePos, const int transposedPos, const int freq);
+    bool needsToSkipCurrentNode(const unsigned short c,
+            const int inputIndex, const int skipPos, const int depth);
+    ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
+            const int skipPos, const int excessivePos, const int transposedPos);
+    // Process a node by considering proximity, missing and excessive character
+    bool processCurrentNode(const int pos, const int depth,
+            const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
+            const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+            int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
+            bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
+            int *nextSiblingPosition);
+    int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
+    // Process a node by considering missing space
+    bool processCurrentNodeForExactMatch(const int firstChildPos,
+            const int startInputIndex, const int depth, unsigned short *word,
+            int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
+    bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
+    inline const int* getInputCharsAt(const int index) const {
+        return mInputCodes + (index * MAX_PROXIMITY_CHARS);
+    }
+    const unsigned char *DICT;
+    const int MAX_WORD_LENGTH;
+    const int MAX_WORDS;
+    const int MAX_PROXIMITY_CHARS;
+    const bool IS_LATEST_DICT_VERSION;
+    const int TYPED_LETTER_MULTIPLIER;
+    const int FULL_WORD_MULTIPLIER;
+    const int ROOT_POS;
+    const unsigned int BYTES_IN_ONE_CHAR;
+    const int MAX_UMLAUT_SEARCH_DEPTH;
+
+    // Flags for special processing
+    // Those *must* match the flags in BinaryDictionary.Flags.ALL_FLAGS in BinaryDictionary.java
+    // or something very bad (like, the apocalypse) will happen.
+    // Please update both at the same time.
+    enum {
+        REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1
+    };
+    static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
+
+    int *mFrequencies;
+    unsigned short *mOutputChars;
+    const int *mInputCodes;
+    int mInputLength;
+    // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
+    unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+    int mMaxEditDistance;
+
+    int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
+    bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
+    int mStackNodeFreq[MAX_WORD_LENGTH_INTERNAL];
+    int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
+    int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
+    int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
+    int mNextLettersFrequency[NEXT_LETTERS_SIZE];
+};
+
+// ----------------------------------------------------------------------------
+
+}; // namespace latinime
+
+#endif // LATINIME_UNIGRAM_DICTIONARY_H