aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/makedict/Android.mk5
-rw-r--r--tools/makedict/etc/Android.mk5
-rwxr-xr-xtools/makedict/etc/makedict4
-rw-r--r--tools/makedict/etc/manifest.txt2
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/BinaryDictInputOutput.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/CharGroupInfo.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/DictionaryMaker.java)2
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/FusionDictionary.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/MakedictLog.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/MakedictLog.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/PendingAttribute.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/PendingAttribute.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/UnsupportedFormatException.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/UnsupportedFormatException.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/Word.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/Word.java)0
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java (renamed from tools/makedict2/src/com/android/inputmethod/latin/XmlDictInputOutput.java)0
-rw-r--r--tools/makedict/src/com/android/tools/dict/BigramDictionary.java286
-rw-r--r--tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java445
-rw-r--r--tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java (renamed from tools/makedict2/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java)0
-rw-r--r--tools/makedict2/Android.mk27
-rw-r--r--tools/makedict2/etc/Android.mk21
-rwxr-xr-xtools/makedict2/etc/makedict263
-rw-r--r--tools/makedict2/etc/manifest.txt1
20 files changed, 11 insertions, 850 deletions
diff --git a/tools/makedict/Android.mk b/tools/makedict/Android.mk
index b9fc5533d..6832b1cb6 100644
--- a/tools/makedict/Android.mk
+++ b/tools/makedict/Android.mk
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2009 The Android Open Source Project
+# Copyright (C) 2011 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -17,8 +17,11 @@ LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(call all-java-files-under,src)
+LOCAL_SRC_FILES += $(call all-java-files-under,tests)
LOCAL_JAR_MANIFEST := etc/manifest.txt
+LOCAL_MODULE_TAGS := eng
LOCAL_MODULE := makedict
+LOCAL_JAVA_LIBRARIES := junit
include $(BUILD_HOST_JAVA_LIBRARY)
include $(LOCAL_PATH)/etc/Android.mk
diff --git a/tools/makedict/etc/Android.mk b/tools/makedict/etc/Android.mk
index da162868a..96a90cbe0 100644
--- a/tools/makedict/etc/Android.mk
+++ b/tools/makedict/etc/Android.mk
@@ -1,4 +1,4 @@
-# Copyright (C) 2009 The Android Open Source Project
+# Copyright (C) 2011 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
+LOCAL_MODULE_TAGS := eng
+
LOCAL_PREBUILT_EXECUTABLES := makedict
include $(BUILD_HOST_PREBUILT)
-
diff --git a/tools/makedict/etc/makedict b/tools/makedict/etc/makedict
index 8420d6e5e..7c1c02e85 100755
--- a/tools/makedict/etc/makedict
+++ b/tools/makedict/etc/makedict
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright 2009, The Android Open Source Project
+# Copyright 2011, The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -60,4 +60,4 @@ fi
# need to use "java.ext.dirs" because "-jar" causes classpath to be ignored
# might need more memory, e.g. -Xmx128M
-exec java -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@"
+exec java -ea -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@"
diff --git a/tools/makedict/etc/manifest.txt b/tools/makedict/etc/manifest.txt
index aa3a3e84c..948609da6 100644
--- a/tools/makedict/etc/manifest.txt
+++ b/tools/makedict/etc/manifest.txt
@@ -1 +1 @@
-Main-Class: com.android.tools.dict.MakeBinaryDictionary
+Main-Class: com.android.inputmethod.latin.DictionaryMaker
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
index 92f402d3e..92f402d3e 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/CharGroupInfo.java b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
index 6badfd13a..6badfd13a 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/CharGroupInfo.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/DictionaryMaker.java b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java
index 61944545a..1ba01075e 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/DictionaryMaker.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java
@@ -71,7 +71,7 @@ public class DictionaryMaker {
}
private void displayHelp() {
- MakedictLog.i("Usage: makedict2 "
+ MakedictLog.i("Usage: makedict "
+ "[-s <unigrams.xml> [-b <bigrams.xml>] | -s <binary input>] "
+ " [-d <binary output>] [-x <xml output>] [-2]\n"
+ "\n"
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
index 031f35dfe..031f35dfe 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/FusionDictionary.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/MakedictLog.java b/tools/makedict/src/com/android/inputmethod/latin/MakedictLog.java
index badb2ff3d..badb2ff3d 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/MakedictLog.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/MakedictLog.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/PendingAttribute.java b/tools/makedict/src/com/android/inputmethod/latin/PendingAttribute.java
index e50202184..e50202184 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/PendingAttribute.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/PendingAttribute.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/UnsupportedFormatException.java b/tools/makedict/src/com/android/inputmethod/latin/UnsupportedFormatException.java
index 511ad5675..511ad5675 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/UnsupportedFormatException.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/UnsupportedFormatException.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/Word.java b/tools/makedict/src/com/android/inputmethod/latin/Word.java
index 916165a41..916165a41 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/Word.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/Word.java
diff --git a/tools/makedict2/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
index 096bfd182..096bfd182 100644
--- a/tools/makedict2/src/com/android/inputmethod/latin/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
diff --git a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java b/tools/makedict/src/com/android/tools/dict/BigramDictionary.java
deleted file mode 100644
index 35115bf2c..000000000
--- a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.tools.dict;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-/**
- * Helper for MakeBinaryDictionary
- * Deals with all the bigram data
- */
-public class BigramDictionary {
-
- /*
- * Must match the values in the client side which is located in dictionary.cpp & dictionary.h
- * Changing these values will generate totally different structure which must be also reflected
- * on the client side.
- */
- public static final int FLAG_BIGRAM_READ = 0x80;
- public static final int FLAG_BIGRAM_CHILDEXIST = 0x40;
- public static final int FLAG_BIGRAM_CONTINUED = 0x80;
- public static final int FLAG_BIGRAM_FREQ = 0x7F;
-
- public static final int FOR_REVERSE_LOOKUPALL = -99;
-
- public ArrayList<String> mBigramToFill = new ArrayList<String>();
- public ArrayList<Integer> mBigramToFillAddress = new ArrayList<Integer>();
-
- public HashMap<String, Bigram> mBi;
-
- public boolean mHasBigram;
-
- public BigramDictionary(String bigramSrcFilename, boolean hasBigram) {
- mHasBigram = hasBigram;
- loadBigram(bigramSrcFilename);
- }
-
- private void loadBigram(String filename) {
- mBi = new HashMap<String, Bigram>();
- if (!mHasBigram) {
- System.out.println("Number of bigrams = " + Bigram.sBigramNum);
- return;
- }
- try {
- SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
- parser.parse(new File(filename), new DefaultHandler() {
- String w1 = null;
- boolean inWord1 = false;
- boolean inWord2 = false;
- int freq = 0, counter = 0;
- Bigram tempBigram = null;
-
- @Override
- public void startElement(String uri, String localName,
- String qName, Attributes attributes) {
- if (qName.equals("bi")) {
- inWord1 = true;
- w1 = attributes.getValue(0);
- int count = Integer.parseInt(attributes.getValue(1));
- tempBigram = new Bigram(count);
- counter = 0;
- } else if (qName.equals("w")) {
- inWord2 = true;
- String word2 = attributes.getValue(0);
- int freq = Integer.parseInt(attributes.getValue(1));
- tempBigram.setWord2(counter, word2, freq);
- counter++;
- Bigram.sBigramNum++;
- }
- }
-
- @Override
- public void endElement(String uri, String localName,
- String qName) {
- if (inWord2) {
- inWord2 = false;
- } else if (inWord1) {
- inWord1 = false;
- mBi.put(w1, tempBigram);
- }
- }
- });
- } catch (Exception ioe) {
- System.err.println("Exception in parsing bigram\n" + ioe);
- ioe.printStackTrace();
- }
- System.out.println("Number of bigrams = " + Bigram.sBigramNum);
- }
-
- byte[] writeBigrams(byte[] dict, Map<String, Integer> mDictionary) {
- for (int i = 0; i < mBigramToFill.size(); i++) {
- String w1 = mBigramToFill.get(i);
- int address = mBigramToFillAddress.get(i);
-
- Bigram temp = mBi.get(w1);
- int word2Count = temp.count;
- int j4;
- for (int j = 0; j < word2Count; j++) {
- if (!mDictionary.containsKey(temp.word2[j])) {
- System.out.println("Not in dictionary: " + temp.word2[j]);
- System.exit(0);
- } else {
- j4 = (j * 4);
- int addressOfWord2 = mDictionary.get(temp.word2[j]);
- dict[address + j4 + 0] = (byte) (((addressOfWord2 & 0x3F0000) >> 16)
- | FLAG_BIGRAM_READ);
- dict[address + j4 + 1] = (byte) ((addressOfWord2 & 0x00FF00) >> 8);
- dict[address + j4 + 2] = (byte) ((addressOfWord2 & 0x0000FF));
-
- if (j == (word2Count - 1)) {
- dict[address + j4 + 3] = (byte) (temp.freq[j] & FLAG_BIGRAM_FREQ);
- } else {
- dict[address + j4 + 3] = (byte) ((temp.freq[j] & FLAG_BIGRAM_FREQ)
- | FLAG_BIGRAM_CONTINUED);
- }
- }
- }
- }
-
- return dict;
- }
-
- void reverseLookupAll(Map<String, Integer> mDictionary, byte[] dict) {
- Set<String> st = mDictionary.keySet();
- for (String s : st) {
- searchForTerminalNode(mDictionary.get(s), FOR_REVERSE_LOOKUPALL, dict);
- }
- }
-
- void searchForTerminalNode(int bigramAddress, int frequency, byte[] dict) {
- StringBuilder sb = new StringBuilder(48);
- int pos;
- boolean found = false;
- int followDownBranchAddress = 2;
- char followingChar = ' ';
- int depth = 0;
- int totalLoopCount = 0;
-
- while (!found) {
- boolean followDownAddressSearchStop = false;
- boolean firstAddress = true;
- boolean haveToSearchAll = true;
-
- if (depth > 0) {
- sb.append(followingChar);
- }
- pos = followDownBranchAddress; // pos start at count
- int count = dict[pos] & 0xFF;
- pos++;
- for (int i = 0; i < count; i++) {
- totalLoopCount++;
- // pos at data
- pos++;
- // pos now at flag
- if (!MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // non-terminal
- if (!followDownAddressSearchStop) {
- int addr = MakeBinaryDictionary.get22BitAddress(pos, dict);
- if (addr > bigramAddress) {
- followDownAddressSearchStop = true;
- if (firstAddress) {
- firstAddress = false;
- haveToSearchAll = true;
- } else if (!haveToSearchAll) {
- break;
- }
- } else {
- followDownBranchAddress = addr;
- followingChar = (char) (0xFF & dict[pos-1]);
- if(firstAddress) {
- firstAddress = false;
- haveToSearchAll = false;
- }
- }
- }
- pos += 3;
- } else if (MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // terminal
- // found !!
- if (bigramAddress == (pos-1)) {
- sb.append((char) (0xFF & dict[pos-1]));
- found = true;
- break;
- }
-
- // address + freq (4 byte)
- if (MakeBinaryDictionary.getSecondBitOfByte(pos, dict)) {
- if (!followDownAddressSearchStop) {
- int addr = MakeBinaryDictionary.get22BitAddress(pos, dict);
- if (addr > bigramAddress) {
- followDownAddressSearchStop = true;
- if (firstAddress) {
- firstAddress = false;
- haveToSearchAll = true;
- } else if (!haveToSearchAll) {
- break;
- }
- } else {
- followDownBranchAddress = addr;
- followingChar = (char) (0xFF & dict[pos-1]);
- if(firstAddress) {
- firstAddress = false;
- haveToSearchAll = true;
- }
- }
- }
- pos += 4;
- } else { // freq only (2 byte)
- pos += 2;
- }
- // skipping bigram
- int bigramExist = (dict[pos] & FLAG_BIGRAM_READ);
- if (bigramExist > 0) {
- int nextBigramExist = 1;
- while (nextBigramExist > 0) {
- pos += 3;
- nextBigramExist = (dict[pos++] & FLAG_BIGRAM_CONTINUED);
- }
- } else {
- pos++;
- }
- }
- }
- depth++;
- if (followDownBranchAddress == 2) {
- System.out.println("ERROR!!! Cannot find bigram!!");
- System.exit(0);
- }
- }
-
- if (frequency == FOR_REVERSE_LOOKUPALL) {
- System.out.println("Reverse: " + sb.toString() + " (" + bigramAddress + ")"
- + " Loop: " + totalLoopCount);
- } else {
- System.out.println(" bigram: " + sb.toString() + " (" + bigramAddress + ") freq: "
- + frequency + " Loop: " + totalLoopCount);
- }
- }
-
- static class Bigram {
- String[] word2;
- int[] freq;
- int count;
- static int sBigramNum = 0;
-
- String getSecondWord(int i) {
- return word2[i];
- }
-
- int getFrequency(int i) {
- return (freq[i] == 0) ? 1 : freq[i];
- }
-
- void setWord2(int index, String word2, int freq) {
- this.word2[index] = word2;
- this.freq[index] = freq;
- }
-
- public Bigram(int word2Count) {
- count = word2Count;
- word2 = new String[word2Count];
- freq = new int[word2Count];
- }
- }
-}
diff --git a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java b/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java
deleted file mode 100644
index 4a285ff07..000000000
--- a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.tools.dict;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-/**
- * Compresses a list of words, frequencies, and bigram data
- * into a tree structured binary dictionary.
- * Dictionary Version: 200 (may contain bigrams)
- * Version number started from 200 rather than 1 because we wanted to prevent number of roots in
- * any old dictionaries being mistaken as the version number. There is not a chance that there
- * will be more than 200 roots. Version number should be increased when there is structural change
- * in the data. There is no need to increase the version when only the words in the data changes.
- */
-public class MakeBinaryDictionary {
- private static final int VERSION_NUM = 200;
-
- private static final String TAG_WORD = "w";
- private static final String ATTR_FREQ = "f";
-
- private static final int FLAG_ADDRESS_MASK = 0x400000;
- private static final int FLAG_TERMINAL_MASK = 0x800000;
- private static final int ADDRESS_MASK = 0x3FFFFF;
-
- private static final int INITIAL_STRING_BUILDER_CAPACITY = 48;
-
- /**
- * Unit for this variable is in bytes
- * If destination file name is main.dict and file limit causes dictionary to be separated into
- * multiple file, it will generate main0.dict, main1.dict, and so forth.
- */
- private static int sOutputFileSize;
- private static boolean sSplitOutput;
-
- private static final CharNode EMPTY_NODE = new CharNode();
-
- private List<CharNode> mRoots;
- private Map<String, Integer> mDictionary;
- private int mWordCount;
-
- private BigramDictionary mBigramDict;
-
- private static class CharNode {
- char data;
- int freq;
- boolean terminal;
- List<CharNode> children;
- static int sNodes;
-
- public CharNode() {
- sNodes++;
- }
- }
-
- private static void usage() {
- System.err.println("Usage: makedict -s <src_dict.xml> [-b <src_bigram.xml>] "
- + "-d <dest.dict> [--size filesize]");
- System.exit(-1);
- }
-
- public static void main(String[] args) {
- int checkSource = -1;
- int checkBigram = -1;
- int checkDest = -1;
- int checkFileSize = -1;
- for (int i = 0; i < args.length; i+=2) {
- if (args[i].equals("-s")) checkSource = (i + 1);
- if (args[i].equals("-b")) checkBigram = (i + 1);
- if (args[i].equals("-d")) checkDest = (i + 1);
- if (args[i].equals("--size")) checkFileSize = (i + 1);
- }
- if (checkFileSize >= 0) {
- sSplitOutput = true;
- sOutputFileSize = Integer.parseInt(args[checkFileSize]);
- } else {
- sSplitOutput = false;
- }
- if (checkDest >= 0 && !args[checkDest].endsWith(".dict")) {
- System.err.println("Error: Dictionary output file extension should be \".dict\"");
- usage();
- } else if (checkSource >= 0 && checkBigram >= 0 && checkDest >= 0 &&
- ((!sSplitOutput && args.length == 6) || (sSplitOutput && args.length == 8))) {
- new MakeBinaryDictionary(args[checkSource], args[checkBigram], args[checkDest]);
- } else if (checkSource >= 0 && checkDest >= 0 &&
- ((!sSplitOutput && args.length == 4) || (sSplitOutput && args.length == 6))) {
- new MakeBinaryDictionary(args[checkSource], null, args[checkDest]);
- } else {
- usage();
- }
- }
-
- private MakeBinaryDictionary(String srcFilename, String bigramSrcFilename,
- String destFilename) {
- System.out.println("Generating dictionary version " + VERSION_NUM);
- mBigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null));
- populateDictionary(srcFilename);
- writeToDict(destFilename);
-
- // Enable the code below to verify that the generated tree is traversable
- // and bigram data is stored correctly.
- if (false) {
- mBigramDict.reverseLookupAll(mDictionary, mDict);
- traverseDict(2, new char[32], 0);
- }
- }
-
- private void populateDictionary(String filename) {
- mRoots = new ArrayList<CharNode>();
- mDictionary = new HashMap<String, Integer>();
- try {
- SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
- parser.parse(new File(filename), new DefaultHandler() {
- boolean inWord;
- int freq;
- StringBuilder wordBuilder = new StringBuilder(INITIAL_STRING_BUILDER_CAPACITY);
-
- @Override
- public void startElement(String uri, String localName,
- String qName, Attributes attributes) {
- if (qName.equals(TAG_WORD)) {
- inWord = true;
- freq = Integer.parseInt(attributes.getValue(ATTR_FREQ));
- wordBuilder.setLength(0);
- }
- }
-
- @Override
- public void characters(char[] data, int offset, int length) {
- // Ignore other whitespace
- if (!inWord) return;
- wordBuilder.append(data, offset, length);
- }
-
- @Override
- public void endElement(String uri, String localName,
- String qName) {
- if (qName.equals(TAG_WORD)) {
- if (wordBuilder.length() >= 1) {
- addWordTop(wordBuilder.toString(), freq);
- mWordCount++;
- }
- inWord = false;
- }
- }
- });
- } catch (Exception ioe) {
- System.err.println("Exception in parsing\n" + ioe);
- ioe.printStackTrace();
- }
- System.out.println("Nodes = " + CharNode.sNodes);
- }
-
- private static int indexOf(List<CharNode> children, char c) {
- if (children == null) {
- return -1;
- }
- for (int i = 0; i < children.size(); i++) {
- if (children.get(i).data == c) {
- return i;
- }
- }
- return -1;
- }
-
- private void addWordTop(String word, int freq) {
- if (freq < 0) {
- freq = 0;
- } else if (freq > 255) {
- freq = 255;
- }
- char firstChar = word.charAt(0);
- int index = indexOf(mRoots, firstChar);
- if (index == -1) {
- CharNode newNode = new CharNode();
- newNode.data = firstChar;
- index = mRoots.size();
- mRoots.add(newNode);
- }
- final CharNode node = mRoots.get(index);
- if (word.length() > 1) {
- addWordRec(node, word, 1, freq);
- } else {
- node.terminal = true;
- node.freq = freq;
- }
- }
-
- private void addWordRec(CharNode parent, String word, int charAt, int freq) {
- CharNode child = null;
- char data = word.charAt(charAt);
- if (parent.children == null) {
- parent.children = new ArrayList<CharNode>();
- } else {
- for (int i = 0; i < parent.children.size(); i++) {
- CharNode node = parent.children.get(i);
- if (node.data == data) {
- child = node;
- break;
- }
- }
- }
- if (child == null) {
- child = new CharNode();
- parent.children.add(child);
- }
- child.data = data;
- if (word.length() > charAt + 1) {
- addWordRec(child, word, charAt + 1, freq);
- } else {
- child.terminal = true;
- child.freq = freq;
- }
- }
-
- private byte[] mDict;
- private int mDictSize;
- private static final int CHAR_WIDTH = 8;
- private static final int FLAGS_WIDTH = 1; // Terminal flag (word end)
- private static final int ADDR_WIDTH = 23; // Offset to children
- private static final int FREQ_WIDTH_BYTES = 1;
- private static final int COUNT_WIDTH_BYTES = 1;
-
- private void addCount(int count) {
- mDict[mDictSize++] = (byte) (0xFF & count);
- }
-
- private void addNode(CharNode node, String word1) {
- if (node.terminal) { // store address of each word1 for bigram dic generation
- mDictionary.put(word1, mDictSize);
- }
-
- int charData = 0xFFFF & node.data;
- if (charData > 254) {
- mDict[mDictSize++] = (byte) 255;
- mDict[mDictSize++] = (byte) ((node.data >> 8) & 0xFF);
- mDict[mDictSize++] = (byte) (node.data & 0xFF);
- } else {
- mDict[mDictSize++] = (byte) (0xFF & node.data);
- }
- if (node.children != null) {
- mDictSize += 3; // Space for children address
- } else {
- mDictSize += 1; // Space for just the terminal/address flags
- }
- if ((0xFFFFFF & node.freq) > 255) {
- node.freq = 255;
- }
- if (node.terminal) {
- byte freq = (byte) (0xFF & node.freq);
- mDict[mDictSize++] = freq;
- // bigram
- if (mBigramDict.mBi.containsKey(word1)) {
- int count = mBigramDict.mBi.get(word1).count;
- mBigramDict.mBigramToFill.add(word1);
- mBigramDict.mBigramToFillAddress.add(mDictSize);
- mDictSize += (4 * count);
- } else {
- mDict[mDictSize++] = (byte) (0x00);
- }
- }
- }
-
- private int mNullChildrenCount = 0;
- private int mNotTerminalCount = 0;
-
- private void updateNodeAddress(int nodeAddress, CharNode node,
- int childrenAddress) {
- if ((mDict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character
- nodeAddress += 2;
- }
- childrenAddress = ADDRESS_MASK & childrenAddress;
- if (childrenAddress == 0) {
- mNullChildrenCount++;
- } else {
- childrenAddress |= FLAG_ADDRESS_MASK;
- }
- if (node.terminal) {
- childrenAddress |= FLAG_TERMINAL_MASK;
- } else {
- mNotTerminalCount++;
- }
- mDict[nodeAddress + 1] = (byte) (childrenAddress >> 16);
- if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) {
- mDict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8);
- mDict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF));
- }
- }
-
- private void writeWordsRec(List<CharNode> children, StringBuilder word) {
- if (children == null || children.size() == 0) {
- return;
- }
- final int childCount = children.size();
- addCount(childCount);
- int[] childrenAddresses = new int[childCount];
- for (int j = 0; j < childCount; j++) {
- CharNode child = children.get(j);
- childrenAddresses[j] = mDictSize;
- word.append(child.data);
- addNode(child, word.toString());
- word.setLength(word.length() - 1);
- }
- for (int j = 0; j < childCount; j++) {
- CharNode child = children.get(j);
- int nodeAddress = childrenAddresses[j];
- int cacheDictSize = mDictSize;
- word.append(child.data);
- writeWordsRec(child.children, word);
- word.setLength(word.length() - 1);
- updateNodeAddress(nodeAddress, child, child.children != null ? cacheDictSize : 0);
- }
- }
-
- private void writeToDict(String dictFilename) {
- // 4MB max, 22-bit offsets
- mDict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably
- // < 1MB in most cases, as there is a limit in the
- // resource size in apks.
- mDictSize = 0;
-
- mDict[mDictSize++] = (byte) (0xFF & VERSION_NUM); // version info
- mDict[mDictSize++] = (byte) (0xFF & (mBigramDict.mHasBigram ? 1 : 0));
-
- final StringBuilder word = new StringBuilder(INITIAL_STRING_BUILDER_CAPACITY);
- writeWordsRec(mRoots, word);
- mDict = mBigramDict.writeBigrams(mDict, mDictionary);
- System.out.println("Dict Size = " + mDictSize);
- if (!sSplitOutput) {
- sOutputFileSize = mDictSize;
- }
- try {
- int currentLoc = 0;
- int i = 0;
- int extension = dictFilename.indexOf(".dict");
- String filename = dictFilename.substring(0, extension);
- while (mDictSize > 0) {
- FileOutputStream fos;
- if (sSplitOutput) {
- fos = new FileOutputStream(filename + i + ".dict");
- } else {
- fos = new FileOutputStream(filename + ".dict");
- }
- if (mDictSize > sOutputFileSize) {
- fos.write(mDict, currentLoc, sOutputFileSize);
- mDictSize -= sOutputFileSize;
- currentLoc += sOutputFileSize;
- } else {
- fos.write(mDict, currentLoc, mDictSize);
- mDictSize = 0;
- }
- fos.close();
- i++;
- }
- } catch (IOException ioe) {
- System.err.println("Error writing dict file:" + ioe);
- }
- }
-
- private void traverseDict(int pos, char[] word, int depth) {
- int count = mDict[pos++] & 0xFF;
- for (int i = 0; i < count; i++) {
- char c = (char) (mDict[pos++] & 0xFF);
- if (c == 0xFF) { // two byte character
- c = (char) (((mDict[pos] & 0xFF) << 8) | (mDict[pos+1] & 0xFF));
- pos += 2;
- }
- word[depth] = c;
- boolean terminal = getFirstBitOfByte(pos, mDict);
- int address = 0;
- if ((mDict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check
- address = get22BitAddress(pos, mDict);
- pos += 3;
- } else {
- pos += 1;
- }
- if (terminal) {
- showWord(word, depth + 1, mDict[pos] & 0xFF);
- pos++;
-
- int bigramExist = (mDict[pos] & mBigramDict.FLAG_BIGRAM_READ);
- if (bigramExist > 0) {
- int nextBigramExist = 1;
- while (nextBigramExist > 0) {
- int bigramAddress = get22BitAddress(pos, mDict);
- pos += 3;
- int frequency = (mBigramDict.FLAG_BIGRAM_FREQ & mDict[pos]);
- mBigramDict.searchForTerminalNode(bigramAddress, frequency, mDict);
- nextBigramExist = (mDict[pos++] & mBigramDict.FLAG_BIGRAM_CONTINUED);
- }
- } else {
- pos++;
- }
- }
- if (address != 0) {
- traverseDict(address, word, depth + 1);
- }
- }
- }
-
- private static void showWord(char[] word, int size, int freq) {
- System.out.print(new String(word, 0, size) + " " + freq + "\n");
- }
-
- /* package */ static int get22BitAddress(int pos, byte[] dict) {
- return ((dict[pos + 0] & 0x3F) << 16)
- | ((dict[pos + 1] & 0xFF) << 8)
- | ((dict[pos + 2] & 0xFF));
- }
-
- /* package */ static boolean getFirstBitOfByte(int pos, byte[] dict) {
- return (dict[pos] & 0x80) > 0;
- }
-
- /* package */ static boolean getSecondBitOfByte(int pos, byte[] dict) {
- return (dict[pos] & 0x40) > 0;
- }
-}
diff --git a/tools/makedict2/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
index 79cf14b2b..79cf14b2b 100644
--- a/tools/makedict2/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
+++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
diff --git a/tools/makedict2/Android.mk b/tools/makedict2/Android.mk
deleted file mode 100644
index e056168e9..000000000
--- a/tools/makedict2/Android.mk
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Copyright (C) 2011 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(call all-java-files-under,src)
-LOCAL_SRC_FILES += $(call all-java-files-under,tests)
-LOCAL_JAR_MANIFEST := etc/manifest.txt
-LOCAL_MODULE_TAGS := eng
-LOCAL_MODULE := makedict2
-LOCAL_JAVA_LIBRARIES := junit
-
-include $(BUILD_HOST_JAVA_LIBRARY)
-include $(LOCAL_PATH)/etc/Android.mk
diff --git a/tools/makedict2/etc/Android.mk b/tools/makedict2/etc/Android.mk
deleted file mode 100644
index c71377cdf..000000000
--- a/tools/makedict2/etc/Android.mk
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (C) 2011 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-LOCAL_MODULE_TAGS := eng
-
-LOCAL_PREBUILT_EXECUTABLES := makedict2
-include $(BUILD_HOST_PREBUILT)
diff --git a/tools/makedict2/etc/makedict2 b/tools/makedict2/etc/makedict2
deleted file mode 100755
index 1bad825e1..000000000
--- a/tools/makedict2/etc/makedict2
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/sh
-# Copyright 2011, The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Set up prog to be the path of this script, including following symlinks,
-# and set up progdir to be the fully-qualified pathname of its directory.
-prog="$0"
-while [ -h "${prog}" ]; do
- newProg=`/bin/ls -ld "${prog}"`
- newProg=`expr "${newProg}" : ".* -> \(.*\)$"`
- if expr "x${newProg}" : 'x/' >/dev/null; then
- prog="${newProg}"
- else
- progdir=`dirname "${prog}"`
- prog="${progdir}/${newProg}"
- fi
-done
-oldwd=`pwd`
-progdir=`dirname "${prog}"`
-cd "${progdir}"
-progdir=`pwd`
-prog="${progdir}"/`basename "${prog}"`
-cd "${oldwd}"
-
-jarfile=makedict2.jar
-frameworkdir="$progdir"
-if [ ! -r "$frameworkdir/$jarfile" ]
-then
- frameworkdir=`dirname "$progdir"`/tools/lib
- libdir=`dirname "$progdir"`/tools/lib
-fi
-if [ ! -r "$frameworkdir/$jarfile" ]
-then
- frameworkdir=`dirname "$progdir"`/framework
- libdir=`dirname "$progdir"`/lib
-fi
-if [ ! -r "$frameworkdir/$jarfile" ]
-then
- echo `basename "$prog"`": can't find $jarfile"
- exit 1
-fi
-
-if [ "$OSTYPE" = "cygwin" ] ; then
- jarpath=`cygpath -w "$frameworkdir/$jarfile"`
- progdir=`cygpath -w "$progdir"`
-else
- jarpath="$frameworkdir/$jarfile"
-fi
-
-# need to use "java.ext.dirs" because "-jar" causes classpath to be ignored
-# might need more memory, e.g. -Xmx128M
-exec java -ea -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@"
diff --git a/tools/makedict2/etc/manifest.txt b/tools/makedict2/etc/manifest.txt
deleted file mode 100644
index 948609da6..000000000
--- a/tools/makedict2/etc/manifest.txt
+++ /dev/null
@@ -1 +0,0 @@
-Main-Class: com.android.inputmethod.latin.DictionaryMaker