aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.h
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.h')
-rw-r--r--native/src/unigram_dictionary.h148
1 files changed, 148 insertions, 0 deletions
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
new file mode 100644
index 000000000..3d3007ce0
--- /dev/null
+++ b/native/src/unigram_dictionary.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_DICTIONARY_H
+#define LATINIME_UNIGRAM_DICTIONARY_H
+
+#include "defines.h"
+#include "proximity_info.h"
+
+namespace latinime {
+
+class UnigramDictionary {
+
+ typedef enum { // Used as a return value for character comparison
+ SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR, // Same char, possibly with different case or accent
+ NEAR_PROXIMITY_CHAR, // It is a char located nearby on the keyboard
+ UNRELATED_CHAR // It is an unrelated char
+ } ProximityType;
+
+public:
+ UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
+ int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
+ int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const int codesSize, const int flags,
+ unsigned short *outWords, int *frequencies);
+ ~UnigramDictionary();
+
+private:
+ void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const int codesSize,
+ unsigned short *outWords, int *frequencies);
+ bool isDigraph(const int* codes, const int i, const int codesSize) const;
+ void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
+ const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
+ const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
+ void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords,
+ int *frequencies);
+ void getSuggestionCandidates(const int skipPos, const int excessivePos,
+ const int transposedPos, int *nextLetters, const int nextLettersSize,
+ const int maxDepth);
+ void getVersionNumber();
+ bool checkIfDictVersionIsLatest();
+ int getAddress(int *pos);
+ int getFreq(int *pos);
+ int wideStrLen(unsigned short *str);
+ bool sameAsTyped(unsigned short *word, int length);
+ bool addWord(unsigned short *word, int length, int frequency);
+ unsigned short toBaseLowerCase(unsigned short c);
+ void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
+ const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
+ const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
+ const int nextLettersSize);
+ bool getSplitTwoWordsSuggestion(const int inputLength,
+ const int firstWordStartPos, const int firstWordLength,
+ const int secondWordStartPos, const int secondWordLength);
+ bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
+ bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos);
+ // Keep getWordsOld for comparing performance between getWords and getWordsOld
+ void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
+ const int excessivePos, const int transposedPos, int *nextLetters,
+ const int nextLettersSize);
+ void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
+ int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
+ const int excessivePos, const int transposedPos, const int freq,
+ const bool sameLength) const;
+ void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
+ const int inputIndex, const int depth, const int snr, int *nextLetters,
+ const int nextLettersSize, const int skipPos, const int excessivePos,
+ const int transposedPos, const int freq);
+ void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
+ const int inputIndex, const int depth, const int snr, const int skipPos,
+ const int excessivePos, const int transposedPos, const int freq);
+ bool needsToSkipCurrentNode(const unsigned short c,
+ const int inputIndex, const int skipPos, const int depth);
+ ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
+ const int skipPos, const int excessivePos, const int transposedPos);
+ // Process a node by considering proximity, missing and excessive character
+ bool processCurrentNode(const int pos, const int depth,
+ const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
+ const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+ int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
+ bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
+ int *nextSiblingPosition);
+ int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
+ // Process a node by considering missing space
+ bool processCurrentNodeForExactMatch(const int firstChildPos,
+ const int startInputIndex, const int depth, unsigned short *word,
+ int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
+ bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
+ inline const int* getInputCharsAt(const int index) const {
+ return mInputCodes + (index * MAX_PROXIMITY_CHARS);
+ }
+ const unsigned char *DICT;
+ const int MAX_WORD_LENGTH;
+ const int MAX_WORDS;
+ const int MAX_PROXIMITY_CHARS;
+ const bool IS_LATEST_DICT_VERSION;
+ const int TYPED_LETTER_MULTIPLIER;
+ const int FULL_WORD_MULTIPLIER;
+ const int ROOT_POS;
+ const unsigned int BYTES_IN_ONE_CHAR;
+ const int MAX_UMLAUT_SEARCH_DEPTH;
+
+ // Flags for special processing
+ // Those *must* match the flags in BinaryDictionary.Flags.ALL_FLAGS in BinaryDictionary.java
+ // or something very bad (like, the apocalypse) will happen.
+ // Please update both at the same time.
+ enum {
+ REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1
+ };
+ static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
+
+ int *mFrequencies;
+ unsigned short *mOutputChars;
+ const int *mInputCodes;
+ int mInputLength;
+ // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mMaxEditDistance;
+
+ int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
+ bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
+ int mStackNodeFreq[MAX_WORD_LENGTH_INTERNAL];
+ int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
+ int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
+ int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
+ int mNextLettersFrequency[NEXT_LETTERS_SIZE];
+};
+
+// ----------------------------------------------------------------------------
+
+}; // namespace latinime
+
+#endif // LATINIME_UNIGRAM_DICTIONARY_H