aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-02-04 07:46:49 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-02-04 07:46:50 +0000
commitffb12e76b88c0e074a2316a10d7ec8857c4b2a39 (patch)
treef0703c1a03131b55f5a753e7ff96f31f92409a33
parent96aee22e7b63d5a0392615a7bc8f7f90e4a7ea19 (diff)
parent941734695b9eeb59135db737e4b153c45e88247a (diff)
downloadlatinime-ffb12e76b88c0e074a2316a10d7ec8857c4b2a39.tar.gz
latinime-ffb12e76b88c0e074a2316a10d7ec8857c4b2a39.tar.xz
latinime-ffb12e76b88c0e074a2316a10d7ec8857c4b2a39.zip
Merge "Implement Ver4PatriciaTriePolicy::getNextWordAndNextToken."
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h6
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java93
6 files changed, 148 insertions, 6 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 00eb57c9f..bdf89450f 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -357,7 +357,7 @@ public final class BinaryDictionary extends Dictionary {
while (len < MAX_WORD_LENGTH && codePoints[len] != 0) {
++len;
}
- final String word = new String(mOutputCodePoints, 0, len);
+ final String word = new String(codePoints, 0, len);
return new GetNextWordPropertyResult(getWordProperty(word), nextToken);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
index b918e0765..824d442e4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
@@ -28,6 +28,14 @@ const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 10000
const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode(
+ const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) {
+ mTerminalPositions->push_back(ptNodeParams->getHeadPos());
+ }
+ return true;
+}
+
// Visits all PtNodes in post-order depth first manner.
// For example, visits c -> b -> y -> x -> a for the following dictionary:
// a _ b _ c
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
index a69490943..bcc5c7857 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -59,6 +59,21 @@ class DynamicPtReadingHelper {
DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
};
+ class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener {
+ public:
+ TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions)
+ : mTerminalPositions(terminalPositions) {}
+ bool onAscend() { return true; }
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+ bool onReadingPtNodeArrayTail() { return true; }
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions);
+
+ std::vector<int> *const mTerminalPositions;
+ };
+
DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer,
const PtNodeReader *const ptNodeReader)
: mIsError(false), mReadingState(), mBuffer(buffer),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 1c420e070..75d85988c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -392,10 +392,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
historicalInfo->getCount(), &bigrams, &shortcuts);
}
-int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token,
- int *const outCodePoints) {
- // TODO: Implement.
- return 0;
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ if (token == 0) {
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
+ outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 1bcd4ceea..9ba5be0c3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#include <vector>
+
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
@@ -50,7 +52,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
- mBigramCount(mHeaderPolicy->getBigramCount()) {};
+ mBigramCount(mHeaderPolicy->getBigramCount()),
+ mTerminalPtNodePositionsForIteratingWords() {};
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -134,6 +137,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount;
int mBigramCount;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index e39b46f94..bab86e546 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -971,6 +971,99 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
}
+ public void testIterateAllWords() {
+ testIterateAllWords(FormatSpec.VERSION4);
+ }
+
+ private void testIterateAllWords(final int formatVersion) {
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+ final int UNIGRAM_COUNT = 1000;
+ final int BIGRAM_COUNT = 1000;
+ final int codePointSetSize = 20;
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
+ assertFalse(invalidWordProperty.isValid());
+
+ final ArrayList<String> words = new ArrayList<String>();
+ final HashMap<String, Integer> wordProbabilitiesToCheckLater =
+ new HashMap<String, Integer>();
+ final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
+ new HashMap<Pair<String, String>, Integer>();
+
+ for (int i = 0; i < UNIGRAM_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
+ if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ words.add(word);
+ wordProbabilitiesToCheckLater.put(word, unigramProbability);
+ }
+
+ for (int i = 0; i < BIGRAM_COUNT; i++) {
+ final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
+ final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
+ if (word0Index == word1Index) {
+ continue;
+ }
+ final String word0 = words.get(word0Index);
+ final String word1 = words.get(word1Index);
+ final int bigramProbability = random.nextInt(0xF);
+ binaryDictionary.addBigramWords(word0, word1, bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ if (!bigrams.containsKey(word0)) {
+ final HashSet<String> bigramWord1s = new HashSet<String>();
+ bigrams.put(word0, bigramWord1s);
+ }
+ bigrams.get(word0).add(word1);
+ bigramProbabilitiesToCheckLater.put(
+ new Pair<String, String>(word0, word1), bigramProbability);
+ }
+
+ final HashSet<String> wordSet = new HashSet<String>(words);
+ final HashSet<Pair<String, String>> bigramSet =
+ new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet());
+ int token = 0;
+ do {
+ final BinaryDictionary.GetNextWordPropertyResult result =
+ binaryDictionary.getNextWordProperty(token);
+ final WordProperty wordProperty = result.mWordProperty;
+ final String word0 = wordProperty.mCodePoints;
+ assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
+ wordProperty.mProbabilityInfo.mProbability);
+ wordSet.remove(word0);
+ final HashSet<String> bigramWord1s = bigrams.get(word0);
+ for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) {
+ final String word1 = wordProperty.mBigramTargets.get(j).mWord;
+ assertTrue(bigramWord1s.contains(word1));
+ final int probability = wordProperty.mBigramTargets.get(j).mFrequency;
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
+ bigramSet.remove(bigram);
+ }
+ token = result.mNextToken;
+ } while (token != 0);
+ assertTrue(wordSet.isEmpty());
+ assertTrue(bigramSet.isEmpty());
+ }
+
public void testAddShortcuts() {
testAddShortcuts(FormatSpec.VERSION4);
}