1 files changed, 146 insertions, 1 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index fdf2ae7b5..3bb218bea 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -303,9 +303,14 @@ public final class FusionDictionary implements Iterable<Word> {
      * Options global to the dictionary.
      */
     public static final class DictionaryOptions {
+        public final boolean mGermanUmlautProcessing;
+        public final boolean mFrenchLigatureProcessing;
         public final HashMap<String, String> mAttributes;
-        public DictionaryOptions(final HashMap<String, String> attributes) {
+        public DictionaryOptions(final HashMap<String, String> attributes,
+                final boolean germanUmlautProcessing, final boolean frenchLigatureProcessing) {
             mAttributes = attributes;
+            mGermanUmlautProcessing = germanUmlautProcessing;
+            mFrenchLigatureProcessing = frenchLigatureProcessing;
         }
         @Override
         public String toString() { // Convenience method
@@ -334,6 +339,14 @@ public final class FusionDictionary implements Iterable<Word> {
                 }
                 s.append("\n");
             }
+            if (mGermanUmlautProcessing) {
+                s.append(indent);
+                s.append("Needs German umlaut processing\n");
+            }
+            if (mFrenchLigatureProcessing) {
+                s.append(indent);
+                s.append("Needs French ligature processing\n");
+            }
             return s.toString();
         }
     }
@@ -688,6 +701,138 @@ public final class FusionDictionary implements Iterable<Word> {
     }
 
     /**
+     * Recursively count the number of nodes in a given branch of the trie.
+     *
+     * @param nodeArray the node array to count.
+     * @return the number of nodes in this branch.
+     */
+    public static int countNodeArrays(final PtNodeArray nodeArray) {
+        int size = 1;
+        for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
+            PtNode ptNode = nodeArray.mData.get(i);
+            if (null != ptNode.mChildren)
+                size += countNodeArrays(ptNode.mChildren);
+        }
+        return size;
+    }
+
+    // Recursively find out whether there are any bigrams.
+    // This can be pretty expensive especially if there aren't any (we return as soon
+    // as we find one, so it's much cheaper if there are bigrams)
+    private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
+        if (null == nodeArray) return false;
+        for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
+            PtNode ptNode = nodeArray.mData.get(i);
+            if (null != ptNode.mBigrams) return true;
+            if (hasBigramsInternal(ptNode.mChildren)) return true;
+        }
+        return false;
+    }
+
+    /**
+     * Finds out whether there are any bigrams in this dictionary.
+     *
+     * @return true if there is any bigram, false otherwise.
+     */
+    // TODO: this is expensive especially for large dictionaries without any bigram.
+    // The up side is, this is always accurate and correct and uses no memory. We should
+    // find a more efficient way of doing this, without compromising too much on memory
+    // and ease of use.
+    public boolean hasBigrams() {
+        return hasBigramsInternal(mRootNodeArray);
+    }
+
+    // Historically, the tails of the words were going to be merged to save space.
+    // However, that would prevent the code to search for a specific address in log(n)
+    // time so this was abandoned.
+    // The code is still of interest as it does add some compression to any dictionary
+    // that has no need for attributes. Implementations that does not read attributes should be
+    // able to read a dictionary with merged tails.
+    // Also, the following code does support frequencies, as in, it will only merges
+    // tails that share the same frequency. Though it would result in the above loss of
+    // performance while searching by address, it is still technically possible to merge
+    // tails that contain attributes, but this code does not take that into account - it does
+    // not compare attributes and will merge terminals with different attributes regardless.
+    public void mergeTails() {
+        MakedictLog.i("Do not merge tails");
+        return;
+
+//        MakedictLog.i("Merging PtNodes. Number of PtNodes : " + countPtNodes(root));
+//        MakedictLog.i("Number of PtNodes : " + countPtNodes(root));
+//
+//        final HashMap<String, ArrayList<PtNodeArray>> repository =
+//                  new HashMap<String, ArrayList<PtNodeArray>>();
+//        mergeTailsInner(repository, root);
+//
+//        MakedictLog.i("Number of different pseudohashes : " + repository.size());
+//        int size = 0;
+//        for (ArrayList<PtNodeArray> a : repository.values()) {
+//            size += a.size();
+//        }
+//        MakedictLog.i("Number of nodes after merge : " + (1 + size));
+//        MakedictLog.i("Recursively seen nodes : " + countNodes(root));
+    }
+
+    // The following methods are used by the deactivated mergeTails()
+//   private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
+//       if (null == a && null == b) return true;
+//       if (null == a || null == b) return false;
+//       if (a.data.size() != b.data.size()) return false;
+//       final int size = a.data.size();
+//       for (int i = size - 1; i >= 0; --i) {
+//           PtNode aPtNode = a.data.get(i);
+//           PtNode bPtNode = b.data.get(i);
+//           if (aPtNode.frequency != bPtNode.frequency) return false;
+//           if (aPtNode.alternates == null && bPtNode.alternates != null) return false;
+//           if (aPtNode.alternates != null && !aPtNode.equals(bPtNode.alternates)) return false;
+//           if (!Arrays.equals(aPtNode.chars, bPtNode.chars)) return false;
+//           if (!isEqual(aPtNode.children, bPtNode.children)) return false;
+//       }
+//       return true;
+//   }
+
+//   static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
+//           final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
+//       final ArrayList<PtNode> branches = nodeArray.data;
+//       final int nodeSize = branches.size();
+//       for (int i = 0; i < nodeSize; ++i) {
+//           PtNode ptNode = branches.get(i);
+//           if (null != ptNode.children) {
+//               String pseudoHash = getPseudoHash(ptNode.children);
+//               ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
+//               if (null == similarList) {
+//                   similarList = new ArrayList<PtNodeArray>();
+//                   map.put(pseudoHash, similarList);
+//               }
+//               boolean merged = false;
+//               for (PtNodeArray similar : similarList) {
+//                   if (isEqual(ptNode.children, similar)) {
+//                       ptNode.children = similar;
+//                       merged = true;
+//                       break;
+//                   }
+//               }
+//               if (!merged) {
+//                   similarList.add(ptNode.children);
+//               }
+//               mergeTailsInner(map, ptNode.children);
+//           }
+//       }
+//       return map;
+//   }
+
+//  private static String getPseudoHash(final PtNodeArray nodeArray) {
+//      StringBuilder s = new StringBuilder();
+//      for (PtNode ptNode : nodeArray.data) {
+//          s.append(ptNode.frequency);
+//          for (int ch : ptNode.chars) {
+//              s.append(Character.toChars(ch));
+//          }
+//      }
+//      return s.toString();
+//  }
+
+    /**
      * Iterator to walk through a dictionary.
      *
      * This is purely for convenience.