aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-11-13 07:59:28 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-11-13 07:59:28 +0000
commitcc356d04e7ff58de02face1ee5c54e43502593e8 (patch)
tree39cf673189018346e4720c7c017bd01cabcc88ac
parentb87ed46118c4bdf34c764a1e93c2cf0d4655f63b (diff)
parentff4b1d1fd06ccc804313d4c649fe00b43c76895e (diff)
downloadlatinime-cc356d04e7ff58de02face1ee5c54e43502593e8.tar.gz
latinime-cc356d04e7ff58de02face1ee5c54e43502593e8.tar.xz
latinime-cc356d04e7ff58de02face1ee5c54e43502593e8.zip
Merge "Implement ver4 dictionary bigram removing methods."
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp27
-rw-r--r--tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java52
8 files changed, 122 insertions, 7 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 028078111..94d7f1061 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -91,6 +91,33 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
}
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list does't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ int readingPos = entryPosToUpdate;
+ bool hasNext = false;
+ int probability = NOT_A_PROBABILITY;
+ int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
+ &originalTargetTerminalId, &readingPos);
+ if (targetTerminalId != originalTargetTerminalId) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ int writingPos = entryPosToUpdate;
+ // Remove bigram entry by overwriting target terminal Id.
+ return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
+ Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+}
+
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos) const {
bool hasNext = true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index e599a2c8d..b3fe13d7d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -42,6 +42,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
bool *const outAddedNewEntry);
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index f02f14582..999460086 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -31,9 +31,12 @@ void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbabili
if (outHasNext) {
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
}
+ const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
if (outTargetTerminalId) {
- *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ *outTargetTerminalId =
+ (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
}
}
@@ -45,7 +48,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
return false;
}
- return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId,
+ const int targetTerminalIdToWrite =
+ (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
+ return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 2b113e38b..ced76d25d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -42,6 +42,10 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index a1830dd0c..b00821373 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -47,6 +47,7 @@ class Ver4DictConstants {
static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
static const int BIGRAM_PROBABILITY_MASK;
static const int BIGRAM_HAS_NEXT_MASK;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 7c66a6691..b572ee87f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -192,8 +192,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- // TODO: Implement.
- return false;
+ return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId());
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 57201e4a8..40cc3d00b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -188,8 +188,31 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- // TODO: Implement.
- return false;
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer.getTailPosition());
+ return false;
+ }
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
}
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
index b7cef738b..ad57e4c9f 100644
--- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
@@ -246,4 +246,56 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
}
+
+ public void testRemoveBigramWords() {
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ getDictionaryOptions(TEST_LOCALE, dictVersion));
+ final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
+ try {
+ encoder.writeDictionary(dict, FORMAT_OPTIONS);
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while writing dictionary", e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "Unsupported format", e);
+ }
+ final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
+ 0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ assertTrue(binaryDictionary.isValidDictionary());
+
+ final int unigramProbability = 100;
+ final int bigramProbability = 10;
+ binaryDictionary.addUnigramWord("aaa", unigramProbability);
+ binaryDictionary.addUnigramWord("abb", unigramProbability);
+ binaryDictionary.addUnigramWord("bcc", unigramProbability);
+ binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
+ binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
+ binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
+ binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
+
+ assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
+ assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
+ assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
+ assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
+
+ binaryDictionary.removeBigramWords("aaa", "abb");
+ assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
+ binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
+ assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
+
+ binaryDictionary.removeBigramWords("aaa", "bcc");
+ assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
+ binaryDictionary.removeBigramWords("abb", "aaa");
+ assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
+ binaryDictionary.removeBigramWords("abb", "bcc");
+ assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
+
+ binaryDictionary.removeBigramWords("aaa", "abb");
+ // Test remove non-existing bigram operation.
+ binaryDictionary.removeBigramWords("aaa", "abb");
+ binaryDictionary.removeBigramWords("bcc", "aaa");
+ }
+
}