aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-02-14 17:56:45 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-02-14 17:56:45 +0900
commitc63d183473390dbe6ddef37df48b36ae49de3f29 (patch)
treee74552ed594f773c33de74b1005d892b26e0b25d
parentfd9599e9c7f2dceb3ecc3ecd4058369d49a7483d (diff)
downloadlatinime-c63d183473390dbe6ddef37df48b36ae49de3f29.tar.gz
latinime-c63d183473390dbe6ddef37df48b36ae49de3f29.tar.xz
latinime-c63d183473390dbe6ddef37df48b36ae49de3f29.zip
Implement PatriciaTriePolicy::getWordProperty().
Bug: 12810574 Change-Id: I7bcccfd3641ebbcf2b8d857d33bb4734c42af5eb
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h5
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java36
3 files changed, 97 insertions, 4 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 8172e70b6..bf38dffa5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
return siblingPos;
}
+const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty was called for invalid word.");
+ return WordProperty();
+ }
+ const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ // Fetch bigram information.
+ std::vector<WordProperty::BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
+ while (bigramsIt.hasNext()) {
+ // Fetch the next bigram information and forward the iterator.
+ bigramsIt.next();
+ // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
+ if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
+ int word1Probability = NOT_A_PROBABILITY;
+ int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + word1CodePointCount);
+ bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(),
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */));
+ }
+ }
+ // Fetch shortcut information.
+ std::vector<WordProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTargetCodePoints[MAX_WORD_LENGTH];
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
+ bool hasNext = true;
+ while (hasNext) {
+ const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
+ hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
+ const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
+ mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
+ std::vector<int> shortcutTarget(shortcutTargetCodePoints,
+ shortcutTargetCodePoints + shortcutTargetLength);
+ const int shortcutProbability =
+ ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
+ shortcuts.push_back(
+ WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability));
+ }
+ }
+ return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
+ ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
+ ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */,
+ &bigrams, &shortcuts);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 1ce7f85d4..da4be87ce 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
const WordProperty getWordProperty(const int *const codePoints,
- const int codePointCount) const {
- // getWordProperty is not supported.
- return WordProperty();
- }
+ const int codePointCount) const;
int getNextWordAndNextToken(const int token, int *const outCodePoints) {
// getNextWordAndNextToken is not supported.
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index 75a1aa3ca..86f245810 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -39,6 +39,7 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Locale;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
@@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
Log.d(TAG, result);
}
}
+
+ public void testVer2DictGetWordProperty() {
+ final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
+ final ArrayList<String> words = sWords;
+ final HashMap<String, List<String>> shortcuts = sShortcuts;
+ final String dictName = "testGetWordProperty";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
+ addUnigrams(words.size(), dict, words, shortcuts);
+ addBigrams(dict, words, sEmptyBigrams);
+ final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
+ getContext().getCacheDir());
+ file.delete();
+ timeWritingDictToFile(file, dict, formatOptions);
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
+ 0 /* offset */, file.length(), true /* useFullEditDistance */,
+ Locale.ENGLISH, dictName, false /* isUpdatable */);
+ for (final String word : words) {
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+ assertEquals(word, wordProperty.mWord);
+ assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
+ if (shortcuts.containsKey(word)) {
+ assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
+ final List<String> shortcutList = shortcuts.get(word);
+ assertTrue(wordProperty.mHasShortcuts);
+ for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ assertTrue(shortcutList.contains(shortcutTarget.mWord));
+ assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
+ shortcutList.remove(shortcutTarget.mWord);
+ }
+ assertTrue(shortcutList.isEmpty());
+ }
+ }
+ }
}