aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java10
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java28
2 files changed, 34 insertions, 4 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 88da7b0d8..d82d503c4 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -131,6 +131,7 @@ public class BinaryDictInputOutput {
// These options need to be the same numeric values as the one in the native reading code.
private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+ private static final int CONTAINS_BIGRAMS_FLAG = 0x8;
// TODO: Make this value adaptative to content data, store it in the header, and
// use it in the reading code.
@@ -752,9 +753,12 @@ public class BinaryDictInputOutput {
/**
* Makes the 2-byte value for options flags.
*/
- private static final int makeOptionsValue(final DictionaryOptions options) {
+ private static final int makeOptionsValue(final FusionDictionary dictionary) {
+ final DictionaryOptions options = dictionary.mOptions;
+ final boolean hasBigrams = dictionary.hasBigrams();
return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0)
- + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0);
+ + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0)
+ + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0);
}
/**
@@ -970,7 +974,7 @@ public class BinaryDictInputOutput {
headerBuffer.write((byte) (0xFF & version));
}
// Options flags
- final int options = makeOptionsValue(dict.mOptions);
+ final int options = makeOptionsValue(dict);
headerBuffer.write((byte) (0xFF & (options >> 8)));
headerBuffer.write((byte) (0xFF & options));
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index c293b2ba4..b08702e47 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -563,7 +563,7 @@ public class FusionDictionary implements Iterable<Word> {
* Recursively count the number of nodes in a given branch of the trie.
*
* @param node the node to count.
- * @result the number of nodes in this branch.
+ * @return the number of nodes in this branch.
*/
public static int countNodes(final Node node) {
int size = 1;
@@ -575,6 +575,32 @@ public class FusionDictionary implements Iterable<Word> {
return size;
}
+ // Recursively find out whether there are any bigrams.
+ // This can be pretty expensive especially if there aren't any (we return as soon
+ // as we find one, so it's much cheaper if there are bigrams)
+ private static boolean hasBigramsInternal(final Node node) {
+ if (null == node) return false;
+ for (int i = node.mData.size() - 1; i >= 0; --i) {
+ CharGroup group = node.mData.get(i);
+ if (null != group.mBigrams) return true;
+ if (hasBigramsInternal(group.mChildren)) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Finds out whether there are any bigrams in this dictionary.
+ *
+ * @return true if there is any bigram, false otherwise.
+ */
+ // TODO: this is expensive especially for large dictionaries without any bigram.
+ // The up side is, this is always accurate and correct and uses no memory. We should
+ // find a more efficient way of doing this, without compromising too much on memory
+ // and ease of use.
+ public boolean hasBigrams() {
+ return hasBigramsInternal(mRoot);
+ }
+
// Historically, the tails of the words were going to be merged to save space.
// However, that would prevent the code to search for a specific address in log(n)
// time so this was abandoned.