diff options
16 files changed, 106 insertions, 22 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 34c190718..6ccfec911 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -46,23 +46,22 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/, \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ - shortcut/shortcut_list_reading_utils.cpp \ structure/dictionary_structure_with_buffer_policy_factory.cpp) \ - $(addprefix suggest/policyimpl/dictionary/bigram/, \ - bigram_list_read_write_utils.cpp \ - ver4_bigram_list_policy.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/pt_common/, \ + bigram/bigram_list_read_write_utils.cpp \ dynamic_pt_gc_event_listeners.cpp \ dynamic_pt_reading_helper.cpp \ dynamic_pt_reading_utils.cpp \ dynamic_pt_updating_helper.cpp \ dynamic_pt_writing_utils.cpp \ - patricia_trie_reading_utils.cpp) \ + patricia_trie_reading_utils.cpp \ + shortcut/shortcut_list_reading_utils.cpp ) \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ ver2_patricia_trie_node_reader.cpp \ ver2_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ + bigram/ver4_bigram_list_policy.cpp \ ver4_dict_buffers.cpp \ ver4_dict_constants.cpp \ ver4_patricia_trie_node_reader.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp index 7d0d09631..08b4e0b5e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h index 15f924a6a..15f924a6a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp index d3eea84b1..91c76941c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h index d065bf7fd..d065bf7fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h index a898e2afc..00bb502dc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h @@ -21,7 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 6959c4e15..54d1e0f6d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -22,9 +22,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h index 6d2b4778c..8e16ccc05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h @@ -21,7 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp index 9e584b416..7a52fd180 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" #include "suggest/core/dictionary/property/bigram_property.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h index 1613941c4..1613941c4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h index fe984615c..790273541 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h @@ -19,7 +19,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp index 67420a252..0a435e91c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp @@ -95,4 +95,4 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce } } -} +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index ae47c8cfc..f89d3d7a0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -17,13 +17,13 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/core/dictionary/property/unigram_property.h" -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -415,4 +415,4 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, return true; } -} +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 86c1c5b7f..b78576484 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -21,10 +21,10 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 12298d967..f31c50253 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -19,9 +19,9 @@ #include <cstring> #include <queue> -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 9ceafa705..770e76e5f 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -1245,6 +1245,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, "bbb", unigramProbability); final int bigramProbability = 10; addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); + final int shortcutProbability = 10; + binaryDictionary.addUnigramWord("ccc", unigramProbability, "xxx", shortcutProbability, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); + binaryDictionary.addUnigramWord("ddd", unigramProbability, null /* shortcutTarget */, + Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */, + true /* isBlacklisted */, 0 /* timestamp */); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); @@ -1256,5 +1262,84 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); // TODO: Add tests for bigram frequency when the implementation gets ready. assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); + WordProperty wordProperty = binaryDictionary.getWordProperty("ccc"); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); + wordProperty = binaryDictionary.getWordProperty("ddd"); + assertTrue(wordProperty.mIsBlacklistEntry); + assertTrue(wordProperty.mIsNotAWord); + } + + public void testLargeDictMigration() { + testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4); + } + + private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { + final int UNIGRAM_COUNT = 3000; + final int BIGRAM_COUNT = 3000; + final int codePointSetSize = 50; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final ArrayList<String> words = new ArrayList<String>(); + final ArrayList<Pair<String, String>> bigrams = new ArrayList<Pair<String,String>>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = + new HashMap<Pair<String, String>, Integer>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + unigramProbabilities.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(words.size()); + final int word1Index = random.nextInt(words.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + bigrams.add(bigram); + bigramProbabilities.put(bigram, bigramProbability); + } + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + + for (final String word : words) { + assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); + } + assertEquals(unigramProbabilities.size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + + for (final Pair<String, String> bigram : bigrams) { + // TODO: Add tests for bigram frequency when the implementation gets ready. + assertTrue(binaryDictionary.isValidBigram(bigram.first, bigram.second)); + } + assertEquals(bigramProbabilities.size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); } } |