aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java147
1 files changed, 146 insertions, 1 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index fdf2ae7b5..3bb218bea 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -303,9 +303,14 @@ public final class FusionDictionary implements Iterable<Word> {
* Options global to the dictionary.
*/
public static final class DictionaryOptions {
+ public final boolean mGermanUmlautProcessing;
+ public final boolean mFrenchLigatureProcessing;
public final HashMap<String, String> mAttributes;
- public DictionaryOptions(final HashMap<String, String> attributes) {
+ public DictionaryOptions(final HashMap<String, String> attributes,
+ final boolean germanUmlautProcessing, final boolean frenchLigatureProcessing) {
mAttributes = attributes;
+ mGermanUmlautProcessing = germanUmlautProcessing;
+ mFrenchLigatureProcessing = frenchLigatureProcessing;
}
@Override
public String toString() { // Convenience method
@@ -334,6 +339,14 @@ public final class FusionDictionary implements Iterable<Word> {
}
s.append("\n");
}
+ if (mGermanUmlautProcessing) {
+ s.append(indent);
+ s.append("Needs German umlaut processing\n");
+ }
+ if (mFrenchLigatureProcessing) {
+ s.append(indent);
+ s.append("Needs French ligature processing\n");
+ }
return s.toString();
}
}
@@ -688,6 +701,138 @@ public final class FusionDictionary implements Iterable<Word> {
}
/**
+ * Recursively count the number of nodes in a given branch of the trie.
+ *
+ * @param nodeArray the node array to count.
+ * @return the number of nodes in this branch.
+ */
+ public static int countNodeArrays(final PtNodeArray nodeArray) {
+ int size = 1;
+ for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
+ PtNode ptNode = nodeArray.mData.get(i);
+ if (null != ptNode.mChildren)
+ size += countNodeArrays(ptNode.mChildren);
+ }
+ return size;
+ }
+
+ // Recursively find out whether there are any bigrams.
+ // This can be pretty expensive especially if there aren't any (we return as soon
+ // as we find one, so it's much cheaper if there are bigrams)
+ private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
+ if (null == nodeArray) return false;
+ for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
+ PtNode ptNode = nodeArray.mData.get(i);
+ if (null != ptNode.mBigrams) return true;
+ if (hasBigramsInternal(ptNode.mChildren)) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Finds out whether there are any bigrams in this dictionary.
+ *
+ * @return true if there is any bigram, false otherwise.
+ */
+ // TODO: this is expensive especially for large dictionaries without any bigram.
+ // The up side is, this is always accurate and correct and uses no memory. We should
+ // find a more efficient way of doing this, without compromising too much on memory
+ // and ease of use.
+ public boolean hasBigrams() {
+ return hasBigramsInternal(mRootNodeArray);
+ }
+
+ // Historically, the tails of the words were going to be merged to save space.
+ // However, that would prevent the code to search for a specific address in log(n)
+ // time so this was abandoned.
+ // The code is still of interest as it does add some compression to any dictionary
+ // that has no need for attributes. Implementations that does not read attributes should be
+ // able to read a dictionary with merged tails.
+ // Also, the following code does support frequencies, as in, it will only merges
+ // tails that share the same frequency. Though it would result in the above loss of
+ // performance while searching by address, it is still technically possible to merge
+ // tails that contain attributes, but this code does not take that into account - it does
+ // not compare attributes and will merge terminals with different attributes regardless.
+ public void mergeTails() {
+ MakedictLog.i("Do not merge tails");
+ return;
+
+// MakedictLog.i("Merging PtNodes. Number of PtNodes : " + countPtNodes(root));
+// MakedictLog.i("Number of PtNodes : " + countPtNodes(root));
+//
+// final HashMap<String, ArrayList<PtNodeArray>> repository =
+// new HashMap<String, ArrayList<PtNodeArray>>();
+// mergeTailsInner(repository, root);
+//
+// MakedictLog.i("Number of different pseudohashes : " + repository.size());
+// int size = 0;
+// for (ArrayList<PtNodeArray> a : repository.values()) {
+// size += a.size();
+// }
+// MakedictLog.i("Number of nodes after merge : " + (1 + size));
+// MakedictLog.i("Recursively seen nodes : " + countNodes(root));
+ }
+
+ // The following methods are used by the deactivated mergeTails()
+// private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
+// if (null == a && null == b) return true;
+// if (null == a || null == b) return false;
+// if (a.data.size() != b.data.size()) return false;
+// final int size = a.data.size();
+// for (int i = size - 1; i >= 0; --i) {
+// PtNode aPtNode = a.data.get(i);
+// PtNode bPtNode = b.data.get(i);
+// if (aPtNode.frequency != bPtNode.frequency) return false;
+// if (aPtNode.alternates == null && bPtNode.alternates != null) return false;
+// if (aPtNode.alternates != null && !aPtNode.equals(bPtNode.alternates)) return false;
+// if (!Arrays.equals(aPtNode.chars, bPtNode.chars)) return false;
+// if (!isEqual(aPtNode.children, bPtNode.children)) return false;
+// }
+// return true;
+// }
+
+// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
+// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
+// final ArrayList<PtNode> branches = nodeArray.data;
+// final int nodeSize = branches.size();
+// for (int i = 0; i < nodeSize; ++i) {
+// PtNode ptNode = branches.get(i);
+// if (null != ptNode.children) {
+// String pseudoHash = getPseudoHash(ptNode.children);
+// ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
+// if (null == similarList) {
+// similarList = new ArrayList<PtNodeArray>();
+// map.put(pseudoHash, similarList);
+// }
+// boolean merged = false;
+// for (PtNodeArray similar : similarList) {
+// if (isEqual(ptNode.children, similar)) {
+// ptNode.children = similar;
+// merged = true;
+// break;
+// }
+// }
+// if (!merged) {
+// similarList.add(ptNode.children);
+// }
+// mergeTailsInner(map, ptNode.children);
+// }
+// }
+// return map;
+// }
+
+// private static String getPseudoHash(final PtNodeArray nodeArray) {
+// StringBuilder s = new StringBuilder();
+// for (PtNode ptNode : nodeArray.data) {
+// s.append(ptNode.frequency);
+// for (int ch : ptNode.chars) {
+// s.append(Character.toChars(ch));
+// }
+// }
+// return s.toString();
+// }
+
+ /**
* Iterator to walk through a dictionary.
*
* This is purely for convenience.