aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/src/bigram_dictionary.h4
-rw-r--r--native/src/binary_format.h4
-rw-r--r--native/src/correction.h8
-rw-r--r--native/src/dictionary.h4
-rw-r--r--native/src/proximity_info.h4
-rw-r--r--native/src/terminal_attributes.h78
-rw-r--r--native/src/unigram_dictionary.cpp26
-rw-r--r--native/src/unigram_dictionary.h11
-rw-r--r--native/src/words_priority_queue.h5
-rw-r--r--native/src/words_priority_queue_pool.h5
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java120
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java3
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java55
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/Word.java7
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java3
-rw-r--r--tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java10
16 files changed, 278 insertions, 69 deletions
diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h
index c07458a38..585a1866a 100644
--- a/native/src/bigram_dictionary.h
+++ b/native/src/bigram_dictionary.h
@@ -21,14 +21,14 @@ namespace latinime {
class Dictionary;
class BigramDictionary {
-public:
+ public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives,
const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
~BigramDictionary();
-private:
+ private:
bool addWordBigram(unsigned short *word, int length, int frequency);
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index cbaccb295..9944fa2bd 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -22,12 +22,12 @@
namespace latinime {
class BinaryFormat {
-private:
+ private:
const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-public:
+ public:
const static int UNKNOWN_FORMAT = -1;
const static int FORMAT_VERSION_1 = 1;
const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1;
diff --git a/native/src/correction.h b/native/src/correction.h
index e55be8dd6..9ba472955 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -27,8 +27,7 @@ namespace latinime {
class ProximityInfo;
class Correction {
-
-public:
+ public:
typedef enum {
TRAVERSE_ALL_ON_TERMINAL,
TRAVERSE_ALL_NOT_ON_TERMINAL,
@@ -95,7 +94,8 @@ public:
inline int getTreeParentIndex(const int index) const {
return mCorrectionStates[index].mParentIndex;
}
-private:
+
+ private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline bool needsToTraverseAllNodes();
@@ -154,7 +154,7 @@ private:
bool mSkipping;
class RankingAlgorithm {
- public:
+ public:
static int calculateFinalFreq(const int inputIndex, const int depth,
const int freq, int *editDistanceTable, const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 52048ecca..79d377a4f 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -28,7 +28,7 @@
namespace latinime {
class Dictionary {
-public:
+ public:
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
@@ -67,7 +67,7 @@ public:
const int pos, unsigned short *c, int *childrenPosition,
bool *terminal, int *freq);
-private:
+ private:
bool hasBigram();
const unsigned char *mDict;
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 832db1062..9ca5505a7 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -26,7 +26,7 @@ namespace latinime {
class Correction;
class ProximityInfo {
-public:
+ public:
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
@@ -68,7 +68,7 @@ public:
return mTouchPositionCorrectionEnabled;
}
-private:
+ private:
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex
diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h
new file mode 100644
index 000000000..1f9815936
--- /dev/null
+++ b/native/src/terminal_attributes.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
+#define LATINIME_TERMINAL_ATTRIBUTES_H
+
+#include "unigram_dictionary.h"
+
+namespace latinime {
+
+/**
+ * This class encapsulates information about a terminal that allows to
+ * retrieve local node attributes like the list of shortcuts without
+ * exposing the format structure to the client.
+ */
+class TerminalAttributes {
+ public:
+ class ShortcutIterator {
+ const uint8_t* const mDict;
+ bool mHasNextShortcutTarget;
+ int mPos;
+
+ public:
+ ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict),
+ mPos(pos) {
+ mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
+ }
+
+ inline bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // see BinaryFormat::getWordAtAddress.
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
+ const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
+ mHasNextShortcutTarget =
+ 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
+ int shortcutAddress =
+ BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos);
+ return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord);
+ }
+ };
+
+ private:
+ const uint8_t* const mDict;
+ const uint8_t mFlags;
+ const int mStartPos;
+
+ public:
+ TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) :
+ mDict(dict), mFlags(flags), mStartPos(pos) {
+ }
+
+ inline bool isShortcutOnly() const {
+ return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY);
+ }
+
+ inline ShortcutIterator getShortcutIterator() const {
+ return ShortcutIterator(mDict, mStartPos, mFlags);
+ }
+};
+} // namespace latinime
+
+#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 7c3c35e40..e95e03ce5 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -25,6 +25,7 @@
#include "unigram_dictionary.h"
#include "binary_format.h"
+#include "terminal_attributes.h"
namespace latinime {
@@ -324,13 +325,28 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
correction, queuePool);
}
-inline void UnigramDictionary::onTerminal(
- const int freq, Correction *correction, WordsPriorityQueue *queue) {
+inline void UnigramDictionary::onTerminal(const int freq,
+ const TerminalAttributes& terminalAttributes, Correction *correction,
+ WordsPriorityQueue *queue) {
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) {
- addWord(wordPointer, wordLength, finalFreq, queue);
+ if (!terminalAttributes.isShortcutOnly()) {
+ addWord(wordPointer, wordLength, finalFreq, queue);
+ }
+ TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means to control the
+ // order of the shortcuts relative to one another or to the word. We need to either
+ // modulate the frequency of each shortcut according to its own shortcut frequency or
+ // to make the queue so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ }
}
}
@@ -646,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(freq, correction, queue);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags,
+ BinaryFormat::skipFrequency(flags, pos));
+ onTerminal(freq, terminalAttributes, correction, queue);
}
// If there are more chars in this node, then this virtual node has children.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index afe92e5b9..23581425a 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -27,10 +27,9 @@
namespace latinime {
+class TerminalAttributes;
class UnigramDictionary {
-
-public:
-
+ public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
@@ -83,8 +82,7 @@ public:
unsigned short *outWords, int *frequencies);
virtual ~UnigramDictionary();
-private:
-
+ private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int inputLength,
const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
@@ -115,7 +113,8 @@ private:
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool);
- void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue);
+ void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
+ Correction *correction, WordsPriorityQueue *queue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index 2d6270977..84f2523c2 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@@ -24,7 +24,7 @@
namespace latinime {
class WordsPriorityQueue {
-public:
+ public:
class SuggestedWord {
public:
int mScore;
@@ -126,7 +126,8 @@ public:
mSuggestions.pop();
}
}
-private:
+
+ private:
struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
return left->mScore > right->mScore;
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
index d964bfc3b..386297650 100644
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@@ -22,7 +22,7 @@
namespace latinime {
class WordsPriorityQueuePool {
-public:
+ public:
WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
@@ -43,7 +43,8 @@ public:
WordsPriorityQueue* getSubQueue2() {
return mSubQueue2;
}
-private:
+
+ private:
WordsPriorityQueue *mMasterQueue;
WordsPriorityQueue *mSubQueue1;
WordsPriorityQueue *mSubQueue2;
diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
index 92f402d3e..b7826f065 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
@@ -26,6 +26,7 @@ import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
@@ -44,8 +45,9 @@ public class BinaryDictInputOutput {
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
* g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
- * | reserved 1 bit, 1 = yes, 0 = no
+ * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
+ * | is shortcut only ? 1 bit, 1 = yes, 0 = no : FLAG_IS_SHORTCUT_ONLY
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -71,6 +73,8 @@ public class BinaryDictInputOutput {
* d
* dress
*
+ * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
+ * | shortcut targets address list
* | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS
* | bigrams address list
*
@@ -126,7 +130,9 @@ public class BinaryDictInputOutput {
private static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
private static final int FLAG_IS_TERMINAL = 0x10;
+ private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
private static final int FLAG_HAS_BIGRAMS = 0x04;
+ private static final int FLAG_IS_SHORTCUT_ONLY = 0x02;
private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -271,10 +277,13 @@ public class BinaryDictInputOutput {
// If terminal, one byte for the frequency
if (group.isTerminal()) size += GROUP_FREQUENCY_SIZE;
size += GROUP_MAX_ADDRESS_SIZE; // For children address
+ if (null != group.mShortcutTargets) {
+ size += (GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE)
+ * group.mShortcutTargets.size();
+ }
if (null != group.mBigrams) {
- for (WeightedString bigram : group.mBigrams) {
- size += GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE;
- }
+ size += (GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE)
+ * group.mBigrams.size();
}
return size;
}
@@ -387,6 +396,15 @@ public class BinaryDictInputOutput {
final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
groupSize += getByteSize(offset);
}
+ if (null != group.mShortcutTargets) {
+ for (WeightedString target : group.mShortcutTargets) {
+ final int offsetBasePoint = groupSize + node.mCachedAddress + size
+ + GROUP_FLAGS_SIZE;
+ final int addressOfTarget = findAddressOfWord(dict, target.mWord);
+ final int offset = addressOfTarget - offsetBasePoint;
+ groupSize += getByteSize(offset) + GROUP_FLAGS_SIZE;
+ }
+ }
if (null != group.mBigrams) {
for (WeightedString bigram : group.mBigrams) {
final int offsetBasePoint = groupSize + node.mCachedAddress + size
@@ -545,7 +563,19 @@ public class BinaryDictInputOutput {
throw new RuntimeException("Node with a strange address");
}
}
- if (null != group.mBigrams) flags |= FLAG_HAS_BIGRAMS;
+ if (null != group.mShortcutTargets) {
+ if (0 == group.mShortcutTargets.size()) {
+ throw new RuntimeException("0-sized shortcut list must be null");
+ }
+ flags |= FLAG_HAS_SHORTCUT_TARGETS;
+ }
+ if (null != group.mBigrams) {
+ if (0 == group.mBigrams.size()) {
+ throw new RuntimeException("0-sized bigram list must be null");
+ }
+ flags |= FLAG_HAS_BIGRAMS;
+ }
+ // TODO: fill in the FLAG_IS_SHORTCUT_ONLY
return flags;
}
@@ -624,20 +654,36 @@ public class BinaryDictInputOutput {
index += shift;
groupAddress += shift;
+ // Write shortcuts
+ if (null != group.mShortcutTargets) {
+ final Iterator shortcutIterator = group.mShortcutTargets.iterator();
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = (WeightedString)shortcutIterator.next();
+ final int addressOfTarget = findAddressOfWord(dict, target.mWord);
+ ++groupAddress;
+ final int offset = addressOfTarget - groupAddress;
+ int shortcutFlags = makeAttributeFlags(shortcutIterator.hasNext(), offset,
+ target.mFrequency);
+ buffer[index++] = (byte)shortcutFlags;
+ final int shortcutShift = writeVariableAddress(buffer, index, Math.abs(offset));
+ index += shortcutShift;
+ groupAddress += shortcutShift;
+ }
+ }
// Write bigrams
if (null != group.mBigrams) {
- int remainingBigrams = group.mBigrams.size();
- for (WeightedString bigram : group.mBigrams) {
- boolean more = remainingBigrams > 1;
+ final Iterator bigramIterator = group.mBigrams.iterator();
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = (WeightedString)bigramIterator.next();
final int addressOfBigram = findAddressOfWord(dict, bigram.mWord);
++groupAddress;
final int offset = addressOfBigram - groupAddress;
- int bigramFlags = makeAttributeFlags(more, offset, bigram.mFrequency);
+ int bigramFlags = makeAttributeFlags(bigramIterator.hasNext(), offset,
+ bigram.mFrequency);
buffer[index++] = (byte)bigramFlags;
final int bigramShift = writeVariableAddress(buffer, index, Math.abs(offset));
index += bigramShift;
groupAddress += bigramShift;
- --remainingBigrams;
}
}
@@ -814,14 +860,43 @@ public class BinaryDictInputOutput {
childrenAddress = NO_CHILDREN_ADDRESS;
break;
}
+ ArrayList<PendingAttribute> shortcutTargets = null;
+ if (0 != (flags & FLAG_HAS_SHORTCUT_TARGETS)) {
+ shortcutTargets = new ArrayList<PendingAttribute>();
+ while (true) {
+ final int targetFlags = source.readUnsignedByte();
+ ++addressPointer;
+ final int sign = 0 == (targetFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1;
+ int targetAddress = addressPointer;
+ switch (targetFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ targetAddress += sign * source.readUnsignedByte();
+ addressPointer += 1;
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ targetAddress += sign * source.readUnsignedShort();
+ addressPointer += 2;
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ final int offset = ((source.readUnsignedByte() << 16)
+ + source.readUnsignedShort());
+ targetAddress += sign * offset;
+ addressPointer += 3;
+ break;
+ default:
+ throw new RuntimeException("Has attribute with no address");
+ }
+ shortcutTargets.add(new PendingAttribute(targetFlags & FLAG_ATTRIBUTE_FREQUENCY,
+ targetAddress));
+ if (0 == (targetFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
+ }
+ }
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- boolean more = true;
- while (more) {
- int bigramFlags = source.readUnsignedByte();
+ while (true) {
+ final int bigramFlags = source.readUnsignedByte();
++addressPointer;
- more = (0 != (bigramFlags & FLAG_ATTRIBUTE_HAS_NEXT));
final int sign = 0 == (bigramFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1;
int bigramAddress = addressPointer;
switch (bigramFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
@@ -844,10 +919,11 @@ public class BinaryDictInputOutput {
}
bigrams.add(new PendingAttribute(bigramFlags & FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress));
+ if (0 == (bigramFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
- childrenAddress, bigrams);
+ childrenAddress, shortcutTargets, bigrams);
}
/**
@@ -925,6 +1001,14 @@ public class BinaryDictInputOutput {
int groupOffset = nodeOrigin + 1; // 1 byte for the group count
for (int i = count; i > 0; --i) {
CharGroupInfo info = readCharGroup(source, groupOffset);
+ ArrayList<WeightedString> shortcutTargets = null;
+ if (null != info.mShortcutTargets) {
+ shortcutTargets = new ArrayList<WeightedString>();
+ for (PendingAttribute target : info.mShortcutTargets) {
+ final String word = getWordAtAddress(source, headerSize, target.mAddress);
+ shortcutTargets.add(new WeightedString(word, target.mFrequency));
+ }
+ }
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
@@ -942,11 +1026,11 @@ public class BinaryDictInputOutput {
source.seek(currentPosition);
}
nodeContents.add(
- new CharGroup(info.mCharacters, bigrams, info.mFrequency,
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
children));
} else {
nodeContents.add(
- new CharGroup(info.mCharacters, bigrams, info.mFrequency));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency));
}
groupOffset = info.mEndAddress;
}
@@ -996,7 +1080,7 @@ public class BinaryDictInputOutput {
new FusionDictionary.DictionaryOptions());
if (null != dict) {
for (Word w : dict) {
- newDict.add(w.mWord, w.mFrequency, w.mBigrams);
+ newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mBigrams);
}
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
index 6badfd13a..759cd452d 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
@@ -29,10 +29,12 @@ public class CharGroupInfo {
public final int[] mCharacters;
public final int mFrequency;
public final int mChildrenAddress;
+ public final ArrayList<PendingAttribute> mShortcutTargets;
public final ArrayList<PendingAttribute> mBigrams;
public CharGroupInfo(final int originalAddress, final int endAddress, final int flags,
final int[] characters, final int frequency, final int childrenAddress,
+ final ArrayList<PendingAttribute> shortcutTargets,
final ArrayList<PendingAttribute> bigrams) {
mOriginalAddress = originalAddress;
mEndAddress = endAddress;
@@ -40,6 +42,7 @@ public class CharGroupInfo {
mCharacters = characters;
mFrequency = frequency;
mChildrenAddress = childrenAddress;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
}
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
index f6220eea2..50def5e62 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
@@ -68,7 +68,7 @@ public class FusionDictionary implements Iterable<Word> {
}
/**
- * A group of characters, with a frequency, shortcuts, bigrams, and children.
+ * A group of characters, with a frequency, shortcut targets, bigrams, and children.
*
* This is the central class of the in-memory representation. A CharGroup is what can
* be seen as a traditional "trie node", except it can hold several characters at the
@@ -82,6 +82,7 @@ public class FusionDictionary implements Iterable<Word> {
public static class CharGroup {
public static final int NOT_A_TERMINAL = -1;
final int mChars[];
+ final ArrayList<WeightedString> mShortcutTargets;
final ArrayList<WeightedString> mBigrams;
final int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren;
@@ -89,18 +90,20 @@ public class FusionDictionary implements Iterable<Word> {
int mCachedSize;
int mCachedAddress;
- public CharGroup(final int[] chars,
+ public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency) {
mChars = chars;
mFrequency = frequency;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
}
- public CharGroup(final int[] chars,
+ public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
mChars = chars;
mFrequency = frequency;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = children;
}
@@ -165,18 +168,29 @@ public class FusionDictionary implements Iterable<Word> {
*
* @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255].
+ * @param shortcutTargets a list of shortcut targets for this word, or null.
* @param bigrams a list of bigrams, or null.
*/
- public void add(String word, int frequency, ArrayList<WeightedString> bigrams) {
+ public void add(final String word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams) {
+ if (null != shortcutTargets) {
+ for (WeightedString target : shortcutTargets) {
+ final CharGroup t = findWordInTree(mRoot, target.mWord);
+ if (null == t) {
+ add(getCodePoints(target.mWord), 0, null, null);
+ }
+ }
+ }
if (null != bigrams) {
for (WeightedString bigram : bigrams) {
final CharGroup t = findWordInTree(mRoot, bigram.mWord);
if (null == t) {
- add(getCodePoints(bigram.mWord), 0, null);
+ add(getCodePoints(bigram.mWord), 0, null, null);
}
}
}
- add(getCodePoints(word), frequency, bigrams);
+ add(getCodePoints(word), frequency, shortcutTargets, bigrams);
}
/**
@@ -200,14 +214,17 @@ public class FusionDictionary implements Iterable<Word> {
/**
* Add a word to this dictionary.
*
- * The bigrams, if any, have to be in the dictionary already. If they aren't,
+ * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't,
* an exception is thrown.
*
* @param word the word, as an int array.
* @param frequency the frequency of the word, in the range [0..255].
+ * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param bigrams an optional list of bigrams for this word (null if none).
*/
- private void add(int[] word, int frequency, ArrayList<WeightedString> bigrams) {
+ private void add(final int[] word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams) {
assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot;
int charIndex = 0;
@@ -231,7 +248,8 @@ public class FusionDictionary implements Iterable<Word> {
// No node at this point to accept the word. Create one.
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup(
- Arrays.copyOfRange(word, charIndex, word.length), bigrams, frequency);
+ Arrays.copyOfRange(word, charIndex, word.length),
+ shortcutTargets, bigrams, frequency);
currentNode.mData.add(insertionIndex, newGroup);
checkStack(currentNode);
} else {
@@ -245,7 +263,7 @@ public class FusionDictionary implements Iterable<Word> {
+ new String(word, 0, word.length));
} else {
final CharGroup newNode = new CharGroup(currentGroup.mChars,
- bigrams, frequency, currentGroup.mChildren);
+ shortcutTargets, bigrams, frequency, currentGroup.mChildren);
currentNode.mData.set(nodeIndex, newNode);
checkStack(currentNode);
}
@@ -254,7 +272,7 @@ public class FusionDictionary implements Iterable<Word> {
// We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- bigrams, frequency);
+ shortcutTargets, bigrams, frequency);
currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode);
}
@@ -268,7 +286,8 @@ public class FusionDictionary implements Iterable<Word> {
+ new String(word, 0, word.length));
}
final CharGroup newGroup = new CharGroup(word,
- currentGroup.mBigrams, frequency, currentGroup.mChildren);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams,
+ frequency, currentGroup.mChildren);
currentNode.mData.set(nodeIndex, newGroup);
}
} else {
@@ -277,7 +296,7 @@ public class FusionDictionary implements Iterable<Word> {
Node newChildren = new Node();
final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
- currentGroup.mChars.length),
+ currentGroup.mChars.length), currentGroup.mShortcutTargets,
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
newChildren.mData.add(newOldWord);
@@ -285,14 +304,14 @@ public class FusionDictionary implements Iterable<Word> {
if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- bigrams, frequency, newChildren);
+ shortcutTargets, bigrams, frequency, newChildren);
} else {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- null, -1, newChildren);
+ null, null, -1, newChildren);
final CharGroup newWord = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex,
- word.length), bigrams, frequency);
+ word.length), shortcutTargets, bigrams, frequency);
final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
@@ -355,7 +374,7 @@ public class FusionDictionary implements Iterable<Word> {
*/
private static int findInsertionIndex(final Node node, int character) {
final List data = node.mData;
- final CharGroup reference = new CharGroup(new int[] { character }, null, 0);
+ final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1;
}
@@ -573,7 +592,7 @@ public class FusionDictionary implements Iterable<Word> {
}
if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
- currentGroup.mBigrams);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams);
} else {
mPositions.removeLast();
currentPos = mPositions.getLast();
diff --git a/tools/makedict/src/com/android/inputmethod/latin/Word.java b/tools/makedict/src/com/android/inputmethod/latin/Word.java
index 916165a41..561b21bb3 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/Word.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/Word.java
@@ -28,11 +28,15 @@ import java.util.ArrayList;
public class Word implements Comparable<Word> {
final String mWord;
final int mFrequency;
+ final ArrayList<WeightedString> mShortcutTargets;
final ArrayList<WeightedString> mBigrams;
- public Word(String word, int frequency, ArrayList<WeightedString> bigrams) {
+ public Word(final String word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams) {
mWord = word;
mFrequency = frequency;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
}
@@ -60,6 +64,7 @@ public class Word implements Comparable<Word> {
if (!(o instanceof Word)) return false;
Word w = (Word)o;
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
+ && mShortcutTargets.equals(w.mShortcutTargets)
&& mBigrams.equals(w.mBigrams);
}
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
index 4720e9d10..19ed9d8d2 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
@@ -107,7 +107,8 @@ public class XmlDictInputOutput {
@Override
public void endElement(String uri, String localName, String qName) {
if (WORD == mState) {
- mDictionary.add(mWord, mFreq, mBigramsMap.get(mWord));
+ // TODO: pass the shortcut targets
+ mDictionary.add(mWord, mFreq, null, mBigramsMap.get(mWord));
mState = START;
}
}
diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
index 79cf14b2b..6ac046bbf 100644
--- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
+++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
@@ -39,11 +39,11 @@ public class BinaryDictInputOutputTest extends TestCase {
// that it does not contain any duplicates.
public void testFlattenNodes() {
final FusionDictionary dict = new FusionDictionary();
- dict.add("foo", 1, null);
- dict.add("fta", 1, null);
- dict.add("ftb", 1, null);
- dict.add("bar", 1, null);
- dict.add("fool", 1, null);
+ dict.add("foo", 1, null, null);
+ dict.add("fta", 1, null, null);
+ dict.add("ftb", 1, null, null);
+ dict.add("bar", 1, null, null);
+ dict.add("fool", 1, null, null);
final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
assertEquals(4, result.size());
while (!result.isEmpty()) {