aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/dicttool/Android.mk10
-rw-r--r--tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java54
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java272
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java51
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java2
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java2
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java70
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java10
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java2
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java109
10 files changed, 384 insertions, 198 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
index 81c0706c1..1a9f029ae 100644
--- a/tools/dicttool/Android.mk
+++ b/tools/dicttool/Android.mk
@@ -42,23 +42,15 @@ LATINIME_TESTS_SRC_DIR := $(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmetho
# a significant part of the dependencies are mocked in the compat/ directory, with empty or
# nearly-empty implementations, for parts that we don't use in Dicttool.
LATINIME_SRC_FILES_FOR_DICTTOOL := \
- event/Combiner.java \
- event/Event.java \
latin/BinaryDictionary.java \
latin/DicTraverseSession.java \
latin/Dictionary.java \
- latin/LastComposedWord.java \
latin/NgramContext.java \
latin/SuggestedWords.java \
- latin/WordComposer.java \
- latin/settings/NativeSuggestOptions.java \
latin/settings/SettingsValuesForSuggestion.java \
latin/utils/BinaryDictionaryUtils.java \
latin/utils/CombinedFormatUtils.java \
- latin/utils/CoordinateUtils.java \
- latin/utils/FileUtils.java \
- latin/utils/JniUtils.java \
- latin/utils/LocaleUtils.java
+ latin/utils/JniUtils.java
LATINIME_OVERRIDABLE_SRC_FILES_FOR_DICTTOOL := \
latin/define/DebugFlags.java
diff --git a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java b/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java
deleted file mode 100644
index c4457a1b7..000000000
--- a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.event;
-
-import java.util.ArrayList;
-
-/**
- * Compatibility class that stands in for the combiner chain in LatinIME.
- *
- * This is not used by dicttool, it's just needed by the dependency chain.
- */
-// TODO: there should not be a dependency to this in dicttool, so there
-// should be a sensible way to separate them cleanly.
-public class CombinerChain {
- private StringBuilder mComposingWord;
- public CombinerChain(final String initialText, final Combiner... combinerList) {
- mComposingWord = new StringBuilder(initialText);
- }
-
- public Event processEvent(final ArrayList<Event> previousEvents, final Event newEvent) {
- return newEvent;
- }
-
- public void applyProcessedEvent(final Event event) {
- mComposingWord.append(event.getTextToCommit());
- }
-
- public CharSequence getComposingWordWithCombiningFeedback() {
- return mComposingWord;
- }
-
- public void reset() {
- mComposingWord.setLength(0);
- }
-
- public static Combiner[] createCombiners(final String spec) {
- // Dicttool never uses a combiner at all, so we just return a zero-sized array.
- return new Combiner[0];
- }
-}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 1c5dfa9fb..84c3956f7 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -19,6 +19,10 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@@ -27,12 +31,18 @@ import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
/**
* Class grouping utilities for offline dictionary making.
@@ -44,26 +54,27 @@ public final class BinaryDictOffdeviceUtils {
// Prefix and suffix are arbitrary, the values do not really matter
private final static String PREFIX = "dicttool";
private final static String SUFFIX = ".tmp";
-
private final static int COPY_BUFFER_SIZE = 8192;
- public static class DecoderChainSpec {
+ public static class DecoderChainSpec<T> {
public final static int COMPRESSION = 1;
public final static int ENCRYPTION = 2;
- private final static int MAX_DECODE_DEPTH = 4;
- final int[] mDecoderSpec;
- File mFile;
+ private final static int[][] VALID_DECODER_CHAINS = {
+ { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
+ };
+
+ private final int mDecoderSpecIndex;
+ public T mResult;
public DecoderChainSpec() {
- mDecoderSpec = new int[0];
- mFile = null;
+ mDecoderSpecIndex = 0;
+ mResult = null;
}
- public DecoderChainSpec(final DecoderChainSpec src, final int newStep) {
- mDecoderSpec = Arrays.copyOf(src.mDecoderSpec, src.mDecoderSpec.length + 1);
- mDecoderSpec[src.mDecoderSpec.length] = newStep;
- mFile = src.mFile;
+ private DecoderChainSpec(final DecoderChainSpec<T> src) {
+ mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
+ mResult = src.mResult;
}
private String getStepDescription(final int step) {
@@ -79,110 +90,177 @@ public final class BinaryDictOffdeviceUtils {
public String describeChain() {
final StringBuilder s = new StringBuilder("raw");
- for (final int step : mDecoderSpec) {
+ for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
s.append(" > ");
s.append(getStepDescription(step));
}
return s.toString();
}
- }
- public static void copy(final InputStream input, final OutputStream output) throws IOException {
- final byte[] buffer = new byte[COPY_BUFFER_SIZE];
- for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
- output.write(buffer, 0, readBytes);
+ /**
+ * Returns the next sequential spec. If exhausted, return null.
+ */
+ public DecoderChainSpec next() {
+ if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
+ return null;
+ }
+ return new DecoderChainSpec(this);
+ }
+
+ public InputStream getStream(final File src) throws FileNotFoundException, IOException {
+ InputStream input = new BufferedInputStream(new FileInputStream(src));
+ for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
+ switch (step) {
+ case COMPRESSION:
+ input = Compress.getUncompressedStream(input);
+ break;
+ case ENCRYPTION:
+ input = Crypt.getDecryptedStream(input);
+ break;
+ }
+ }
+ return input;
}
}
- /**
- * Returns a decrypted/uncompressed dictionary.
- *
- * This will decrypt/uncompress any number of times as necessary until it finds the
- * dictionary signature, and copy the decoded file to a temporary place.
- * If this is not a dictionary, the method returns null.
- */
- public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
- return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
+ public interface InputProcessor<T> {
+ @Nonnull
+ public T process(@Nonnull final InputStream input)
+ throws IOException, UnsupportedFormatException;
}
- private static DecoderChainSpec getRawDictionaryOrNullInternal(
- final DecoderChainSpec spec, final File src, final int depth) {
- // Unfortunately the decoding scheme we use can consider any data to be encrypted
- // and will produce some output, meaning it's not possible to reliably detect encrypted
- // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
- // over and over, ending in a stack overflow. Hence we limit the depth at which we try
- // decoding the file.
- if (depth > DecoderChainSpec.MAX_DECODE_DEPTH) {
- return null;
+ public static class CopyProcessor implements InputProcessor<File> {
+ @Override @Nonnull
+ public File process(@Nonnull final InputStream input) throws IOException,
+ UnsupportedFormatException {
+ final File dst = File.createTempFile(PREFIX, SUFFIX);
+ dst.deleteOnExit();
+ try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
+ copy(input, output);
+ output.flush();
+ output.close();
+ if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
+ || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
+ return dst;
+ }
+ }
+ throw new UnsupportedFormatException("Input stream not at the expected format");
}
- if (BinaryDictDecoderUtils.isBinaryDictionary(src)
- || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
- spec.mFile = src;
- return spec;
+ }
+
+ public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
+ // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
+ // than this. Revisit this if needed later.
+ private final int MAX_HEADER_LENGTH = 32 * 1024;
+ @Override @Nonnull
+ public DictionaryHeader process(final InputStream input) throws IOException,
+ UnsupportedFormatException {
+ // Do everything as curtly and ad-hoc as possible for performance.
+ final byte[] tmpBuffer = new byte[12];
+ if (tmpBuffer.length != input.read(tmpBuffer)) {
+ throw new UnsupportedFormatException("File too short, not a dictionary");
+ }
+ // Ad-hoc check for the magic number. See FormatSpec.java as well as
+ // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
+ final int MAGIC_NUMBER_START_OFFSET = 0;
+ final int VERSION_START_OFFSET = 4;
+ final int HEADER_SIZE_OFFSET = 8;
+ final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
+ if (magicNumber != FormatSpec.MAGIC_NUMBER) {
+ throw new UnsupportedFormatException("Wrong magic number");
+ }
+ final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
+ + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
+ if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
+ && version != FormatSpec.VERSION202) {
+ throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
+ }
+ final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
+ if (totalHeaderSize > MAX_HEADER_LENGTH) {
+ throw new UnsupportedFormatException("Header too large");
+ }
+ final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
+ readStreamExhaustively(input, headerBuffer);
+ final HashMap<String, String> attributes =
+ BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
+ return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
+ new FormatOptions(version, false /* hasTimestamp */));
}
- // It's not a raw dictionary - try to see if it's compressed.
- final File uncompressedFile = tryGetUncompressedFile(src);
- if (null != uncompressedFile) {
- final DecoderChainSpec newSpec =
- getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
- if (null == newSpec) return null;
- return new DecoderChainSpec(newSpec, DecoderChainSpec.COMPRESSION);
+ }
+
+ private static void readStreamExhaustively(final InputStream inputStream,
+ final byte[] outBuffer) throws IOException, UnsupportedFormatException {
+ int readBytes = 0;
+ int readBytesLastCycle = -1;
+ while (readBytes != outBuffer.length) {
+ readBytesLastCycle = inputStream.read(outBuffer, readBytes,
+ outBuffer.length - readBytes);
+ if (readBytesLastCycle == -1)
+ throw new UnsupportedFormatException("File shorter than specified in the header"
+ + " (expected " + outBuffer.length + ", read " + readBytes + ")");
+ readBytes += readBytesLastCycle;
}
- // It's not a compressed either - try to see if it's crypted.
- final File decryptedFile = tryGetDecryptedFile(src);
- if (null != decryptedFile) {
- final DecoderChainSpec newSpec =
- getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
- if (null == newSpec) return null;
- return new DecoderChainSpec(newSpec, DecoderChainSpec.ENCRYPTION);
+ }
+
+ public static void copy(final InputStream input, final OutputStream output) throws IOException {
+ final byte[] buffer = new byte[COPY_BUFFER_SIZE];
+ for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
+ output.write(buffer, 0, readBytes);
}
- return null;
}
- /* Try to uncompress the file passed as an argument.
+ /**
+ * Process a dictionary, decrypting/uncompressing it on the fly as necessary.
*
- * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
- * is returned.
+ * This will execute the given processor repeatedly with the possible alternatives
+ * for dictionary format until the processor does not throw an exception.
+ * If the processor succeeds for none of the possible formats, the method returns null.
*/
- private static File tryGetUncompressedFile(final File src) {
- try {
- final File dst = File.createTempFile(PREFIX, SUFFIX);
- dst.deleteOnExit();
- try (
- final InputStream input = Compress.getUncompressedStream(
- new BufferedInputStream(new FileInputStream(src)));
- final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
- ) {
- copy(input, output);
- return dst;
+ @Nullable
+ public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
+ @Nonnull final InputProcessor<T> processor) {
+ @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
+ while (null != spec) {
+ try {
+ final InputStream input = spec.getStream(src);
+ spec.mResult = processor.process(input);
+ try {
+ input.close();
+ } catch (IOException e) {
+ // CipherInputStream doesn't like being closed without having read the
+ // entire stream, for some reason. But we don't want to because it's a waste
+ // of resources. We really, really don't care about this.
+ // However on close() CipherInputStream does throw this exception, wrapped
+ // in an IOException so we need to catch it.
+ if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
+ throw e;
+ }
+ }
+ return spec;
+ } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
+ // If the format is not the right one for this file, the processor will throw one
+ // of these exceptions. In our case, that means we should try the next spec,
+ // since it may still be at another format we haven't tried yet.
+ // TODO: stop using exceptions for this non-exceptional case.
}
- } catch (final IOException e) {
- // Could not uncompress the file: presumably the file is simply not a compressed file
- return null;
+ spec = spec.next();
}
+ return null;
}
- /* Try to decrypt the file passed as an argument.
- *
- * If the file can be decrypted, the decrypted version is returned. Otherwise, null
- * is returned.
+ /**
+ * Get a decoder chain spec with a raw dictionary file. This makes a new file on the
+ * disk ready for any treatment the client wants.
*/
- private static File tryGetDecryptedFile(final File src) {
- try {
- final File dst = File.createTempFile(PREFIX, SUFFIX);
- dst.deleteOnExit();
- try (
- final InputStream input = Crypt.getDecryptedStream(
- new BufferedInputStream(new FileInputStream(src)));
- final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
- ) {
- copy(input, output);
- return dst;
- }
- } catch (final IOException e) {
- // Could not decrypt the file: presumably the file is simply not a crypted file
- return null;
- }
+ @Nullable
+ public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
+ return decodeDictionaryForProcess(src, new CopyProcessor());
}
static FusionDictionary getDictionary(final String filename, final boolean report) {
@@ -192,28 +270,28 @@ public final class BinaryDictOffdeviceUtils {
System.out.println("Size : " + file.length() + " bytes");
}
try {
- final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
+ final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
if (null == decodedSpec) {
throw new RuntimeException("Does not seem to be a dictionary file " + filename);
}
- if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
+ if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
if (report) {
System.out.println("Format : Combined format");
System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+ System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
}
try (final BufferedReader reader = new BufferedReader(
- new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
+ new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
return CombinedInputOutput.readDictionaryCombined(reader);
}
}
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
- decodedSpec.mFile, 0, decodedSpec.mFile.length(),
+ decodedSpec.mResult, 0, decodedSpec.mResult.length(),
DictDecoder.USE_BYTEARRAY);
if (report) {
System.out.println("Format : Binary dictionary format");
System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+ System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
}
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
} catch (final IOException | UnsupportedFormatException e) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index 48d2e5922..955c5728c 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -98,6 +98,7 @@ public class CombinedInputOutput {
String word = null;
ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
boolean isNotAWord = false;
+ boolean isPossiblyOffensive = false;
ArrayList<WeightedString> bigrams = new ArrayList<>();
ArrayList<WeightedString> shortcuts = new ArrayList<>();
while (null != (line = reader.readLine())) {
@@ -106,7 +107,7 @@ public class CombinedInputOutput {
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
- isNotAWord, false /* isPossiblyOffensive */);
+ isNotAWord, isPossiblyOffensive);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
@@ -114,27 +115,37 @@ public class CombinedInputOutput {
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
isNotAWord = false;
+ isPossiblyOffensive = false;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
- word = params[1];
- } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
- probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
- probabilityInfo.mTimestamp, probabilityInfo.mLevel,
- probabilityInfo.mCount);
- } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
- final String[] historicalInfoParams =
- params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
- if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
- throw new RuntimeException("Wrong format (historical info) : " + line);
- }
- probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
- Integer.parseInt(historicalInfoParams[0]),
- Integer.parseInt(historicalInfoParams[1]),
- Integer.parseInt(historicalInfoParams[2]));
- } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
- isNotAWord = "true".equals(params[1]);
+ switch (params[0]) {
+ case CombinedFormatUtils.WORD_TAG:
+ word = params[1];
+ break;
+ case CombinedFormatUtils.PROBABILITY_TAG:
+ probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+ probabilityInfo.mTimestamp, probabilityInfo.mLevel,
+ probabilityInfo.mCount);
+ break;
+ case CombinedFormatUtils.HISTORICAL_INFO_TAG:
+ final String[] historicalInfoParams = params[1].split(
+ CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : "
+ + line);
+ }
+ probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
+ Integer.parseInt(historicalInfoParams[0]),
+ Integer.parseInt(historicalInfoParams[1]),
+ Integer.parseInt(historicalInfoParams[2]));
+ break;
+ case CombinedFormatUtils.NOT_A_WORD_TAG:
+ isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]);
+ break;
+ case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG:
+ isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]);
+ break;
}
}
} else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
@@ -190,7 +201,7 @@ public class CombinedInputOutput {
}
if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
- false /* isPossiblyOffensive */);
+ isPossiblyOffensive);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
index 0d93c7fa9..8fdf7633f 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
@@ -18,7 +18,9 @@ package com.android.inputmethod.latin.dicttool;
public class CommandList {
public static void populate() {
+ // TODO: Move some commands to native code.
Dicttool.addCommand("info", Info.class);
+ Dicttool.addCommand("header", Header.class);
Dicttool.addCommand("diff", Diff.class);
Dicttool.addCommand("compress", Compress.Compressor.class);
Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 8f9e4a3a6..6187853c8 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -120,7 +120,7 @@ public class DictionaryMaker {
String inputCombined = null;
String outputBinary = null;
String outputCombined = null;
- int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
+ int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202.
// Don't use code point table by default.
int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
new file mode 100644
index 000000000..ba96c0aeb
--- /dev/null
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Locale;
+
+public class Header extends Dicttool.Command {
+ public static final String COMMAND = "header";
+
+ public Header() {
+ }
+
+ @Override
+ public String getHelp() {
+ return COMMAND + " <filename>: prints the header contents of a dictionary file";
+ }
+
+ @Override
+ public void run() throws UnsupportedFormatException {
+ final boolean plumbing;
+ if (mArgs.length > 0 && "-p".equals(mArgs[0])) {
+ plumbing = true;
+ mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
+ } else {
+ plumbing = false;
+ }
+ if (mArgs.length < 1) {
+ throw new RuntimeException("Not enough arguments for command " + COMMAND);
+ }
+ final String filename = mArgs[0];
+ final File dictFile = new File(filename);
+ final DecoderChainSpec<DictionaryHeader> spec =
+ BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile,
+ new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+ if (null == spec) {
+ throw new UnsupportedFormatException(filename
+ + " doesn't seem to be a valid version 2 dictionary file");
+ }
+
+ final DictionaryHeader header = spec.mResult;
+ System.out.println("Dictionary : " + dictFile.getAbsolutePath());
+ System.out.println("Size : " + dictFile.length() + " bytes");
+ System.out.println("Format : Binary dictionary format");
+ System.out.println("Format version : " + header.mFormatOptions.mVersion);
+ System.out.println("Packaging : " + spec.describeChain());
+ System.out.println("Header attributes :");
+ System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing));
+ }
+}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
index 47ea70629..3efa10a80 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
@@ -16,6 +16,8 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
@@ -77,16 +79,16 @@ public class Package {
if (mArgs.length != 2) {
throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
}
- final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec =
- BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
+ final BinaryDictOffdeviceUtils.DecoderChainSpec<DictionaryHeader> decodedSpec =
+ BinaryDictOffdeviceUtils.decodeDictionaryForProcess(new File(mArgs[0]),
+ new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
if (null == decodedSpec) {
System.out.println(mArgs[0] + " does not seem to be a dictionary");
return;
}
System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
try (
- final InputStream input = getFileInputStream(decodedSpec.mFile);
+ final InputStream input = decodedSpec.getStream(new File(mArgs[0]));
final OutputStream output = new BufferedOutputStream(
getFileOutputStreamOrStdOut(mArgs[1]))
) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index b6383d788..e2dd5199b 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -16,10 +16,10 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.common.FileUtils;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
-import com.android.inputmethod.latin.utils.FileUtils;
import java.io.File;
import java.io.IOException;
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
index 6cdbff7e5..ea9d4cc19 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -16,10 +16,17 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.common.CodePointUtils;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils;
+import com.android.inputmethod.latin.dicttool.Compress;
+import com.android.inputmethod.latin.dicttool.Crypt;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
+import com.android.inputmethod.latin.makedict.BinaryDictUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
@@ -35,13 +42,37 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
/**
* Unit tests for BinaryDictOffdeviceUtils
*/
public class BinaryDictOffdeviceUtilsTests extends TestCase {
private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
+ private static final int CODE_POINT_SET_SIZE = 300;
+ final Random mRandom;
+ private static final ArrayList<String> sWords = new ArrayList<>();
+
+ public BinaryDictOffdeviceUtilsTests(final long seed, final int maxUnigrams) {
+ super();
+ mRandom = new Random(seed);
+ sWords.clear();
+ generateWords(maxUnigrams, mRandom);
+ }
+
+ private static void generateWords(final int maxUnigrams, final Random random) {
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(
+ CODE_POINT_SET_SIZE, random);
+ final Set<String> wordSet = new HashSet<>();
+ while (wordSet.size() < maxUnigrams) {
+ wordSet.add(CodePointUtils.generateWord(random, codePointSet));
+ }
+ sWords.addAll(wordSet);
+ }
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
final String VERSION = "1";
@@ -68,23 +99,17 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
final File dst = File.createTempFile("testGetRawDict", ".tmp");
dst.deleteOnExit();
try (final OutputStream out = Compress.getCompressedStream(
- Compress.getCompressedStream(
- Compress.getCompressedStream(
- new BufferedOutputStream(new FileOutputStream(dst)))))) {
+ new BufferedOutputStream(new FileOutputStream(dst)))) {
final DictEncoder dictEncoder = new Ver2DictEncoder(out);
- dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
+ dictEncoder.writeDictionary(dict, new FormatOptions(FormatSpec.VERSION202, false));
}
// Test for an actually compressed dictionary and its contents
- final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
+ final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodeSpec =
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
- for (final int step : decodeSpec.mDecoderSpec) {
- assertEquals("Wrong decode spec",
- BinaryDictOffdeviceUtils.DecoderChainSpec.COMPRESSION, step);
- }
- assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.length);
- final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0,
- decodeSpec.mFile.length());
+ assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
+ final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mResult, 0,
+ decodeSpec.mResult.length());
final FusionDictionary resultDict =
dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get(
@@ -125,4 +150,64 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
assertNull("Wrongly identified data file",
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst));
}
+
+ public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt)
+ throws IOException, UnsupportedFormatException {
+ final String dictName = "testHeaderReaderProcessor";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
+ final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5;
+ final HashMap<String, String> options = new HashMap<>();
+ // Required attributes
+ options.put("dictionary", "main:en_US");
+ options.put("locale", "en_US");
+ options.put("version", Integer.toString(mRandom.nextInt()));
+ // Add some random options for test
+ final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1);
+ for (int i = 0; i < numberOfOptionsToAdd; ++i) {
+ options.put(sWords.get(2 * i), sWords.get(2 * 1 + 1));
+ }
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ new DictionaryOptions(options));
+
+ for (int i = 0; i < sWords.size(); ++i) {
+ final String word = sWords.get(i);
+ dict.add(word, new ProbabilityInfo(TEST_FREQ), null /* shortcuts */,
+ false /* isNotAWord */, false /* isPossiblyOffensive */);
+ }
+
+ File file = File.createTempFile(dictName, ".tmp");
+ final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
+ dictEncoder.writeDictionary(dict, formatOptions);
+
+ if (compress) {
+ final File rawFile = file;
+ file = File.createTempFile(dictName + ".compress", ".tmp");
+ final Compress.Compressor compressCommand = new Compress.Compressor();
+ compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
+ compressCommand.run();
+ }
+ if (crypt) {
+ final File rawFile = file;
+ file = File.createTempFile(dictName + ".crypt", ".tmp");
+ final Crypt.Encrypter cryptCommand = new Crypt.Encrypter();
+ cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
+ cryptCommand.run();
+ }
+
+ final DecoderChainSpec<DictionaryHeader> spec =
+ BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file,
+ new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+ assertNotNull("Can't decode a dictionary we just wrote : " + file, spec);
+ final DictionaryHeader header = spec.mResult;
+ assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""),
+ spec.describeChain());
+ assertEquals(header.mDictionaryOptions.mAttributes, options);
+ }
+
+ public void testHeaderReaderProcessor() throws IOException, UnsupportedFormatException {
+ runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */);
+ runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */);
+ runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */);
+ }
}