aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.h
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.h')
-rw-r--r--native/src/unigram_dictionary.h118
1 files changed, 118 insertions, 0 deletions
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
new file mode 100644
index 000000000..90c98149b
--- /dev/null
+++ b/native/src/unigram_dictionary.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_DICTIONARY_H
+#define LATINIME_UNIGRAM_DICTIONARY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class UnigramDictionary {
+
+ typedef enum { // Used as a return value for character comparison
+ SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR, // Same char, possibly with different case or accent
+ NEAR_PROXIMITY_CHAR, // It is a char located nearby on the keyboard
+ UNRELATED_CHAR // It is an unrelated char
+ } ProximityType;
+
+public:
+ UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
+ int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
+ int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
+ int *nextLetters, int nextLettersSize);
+ ~UnigramDictionary();
+
+private:
+ void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
+ void getSuggestionCandidates(const int skipPos, const int excessivePos,
+ const int transposedPos, int *nextLetters, const int nextLettersSize,
+ const int maxDepth);
+ void getVersionNumber();
+ bool checkIfDictVersionIsLatest();
+ int getAddress(int *pos);
+ int getFreq(int *pos);
+ int wideStrLen(unsigned short *str);
+ bool sameAsTyped(unsigned short *word, int length);
+ bool addWord(unsigned short *word, int length, int frequency);
+ unsigned short toLowerCase(unsigned short c);
+ void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
+ const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
+ const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
+ const int nextLettersSize);
+ bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
+ // Keep getWordsOld for comparing performance between getWords and getWordsOld
+ void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
+ const int excessivePos, const int transposedPos, int *nextLetters,
+ const int nextLettersSize);
+ void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
+ int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
+ const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
+ void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
+ const int inputIndex, const int depth, const int snr, int *nextLetters,
+ const int nextLettersSize, const int skipPos, const int excessivePos,
+ const int transposedPos, const int freq);
+ void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
+ const int inputIndex, const int depth, const int snr, const int skipPos,
+ const int excessivePos, const int transposedPos, const int freq);
+ bool needsToSkipCurrentNode(const unsigned short c,
+ const int inputIndex, const int skipPos, const int depth);
+ ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
+ const int skipPos, const int excessivePos, const int transposedPos);
+ // Process a node by considering proximity, missing and excessive character
+ bool processCurrentNode(const int pos, const int depth,
+ const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
+ const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+ int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
+ bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
+ int *nextSiblingPosition);
+ int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
+ // Process a node by considering missing space
+ bool processCurrentNodeForExactMatch(const int firstChildPos,
+ const int startInputIndex, const int depth, unsigned short *word,
+ int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
+ bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
+ int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);}
+ const unsigned char *DICT;
+ const int MAX_WORD_LENGTH;
+ const int MAX_WORDS;
+ const int MAX_PROXIMITY_CHARS;
+ const bool IS_LATEST_DICT_VERSION;
+ const int TYPED_LETTER_MULTIPLIER;
+ const int FULL_WORD_MULTIPLIER;
+ const int ROOT_POS;
+
+ int *mFrequencies;
+ unsigned short *mOutputChars;
+ int *mInputCodes;
+ int mInputLength;
+ // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mMaxEditDistance;
+
+ int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
+ bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
+ int mStackNodeFreq[MAX_WORD_LENGTH_INTERNAL];
+ int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
+ int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
+ int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
+};
+
+// ----------------------------------------------------------------------------
+
+}; // namespace latinime
+
+#endif // LATINIME_UNIGRAM_DICTIONARY_H