diff options
Diffstat (limited to 'tools')
40 files changed, 759 insertions, 844 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index 3e3d419e6..1a9f029ae 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -35,7 +35,6 @@ LATINIME_LOCAL_DIR := ../.. LATINIME_BASE_SRC_DIR := $(LATINIME_LOCAL_DIR)/java/src/com/android/inputmethod LATINIME_BASE_OVERRIDABLE_SRC_DIR := \ $(LATINIME_LOCAL_DIR)/java-overridable/src/com/android/inputmethod -LATINIME_ANNOTATIONS_SRC_DIR := $(LATINIME_BASE_SRC_DIR)/annotations MAKEDICT_CORE_SRC_DIR := $(LATINIME_BASE_SRC_DIR)/latin/makedict LATINIME_TESTS_SRC_DIR := $(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmethod/latin @@ -43,26 +42,15 @@ LATINIME_TESTS_SRC_DIR := $(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmetho # a significant part of the dependencies are mocked in the compat/ directory, with empty or # nearly-empty implementations, for parts that we don't use in Dicttool. LATINIME_SRC_FILES_FOR_DICTTOOL := \ - event/Combiner.java \ - event/Event.java \ latin/BinaryDictionary.java \ latin/DicTraverseSession.java \ latin/Dictionary.java \ - latin/InputPointers.java \ - latin/LastComposedWord.java \ - latin/PrevWordsInfo.java \ + latin/NgramContext.java \ latin/SuggestedWords.java \ - latin/WordComposer.java \ - latin/settings/NativeSuggestOptions.java \ latin/settings/SettingsValuesForSuggestion.java \ latin/utils/BinaryDictionaryUtils.java \ latin/utils/CombinedFormatUtils.java \ - latin/utils/CoordinateUtils.java \ - latin/utils/FileUtils.java \ - latin/utils/JniUtils.java \ - latin/utils/LocaleUtils.java \ - latin/utils/ResizableIntArray.java \ - latin/utils/StringUtils.java + latin/utils/JniUtils.java LATINIME_OVERRIDABLE_SRC_FILES_FOR_DICTTOOL := \ latin/define/DebugFlags.java @@ -82,18 +70,15 @@ DICTTOOL_COMPAT_TESTS_DIR := compat LOCAL_MAIN_SRC_FILES := $(call all-java-files-under, $(MAKEDICT_CORE_SRC_DIR)) LOCAL_TOOL_SRC_FILES := $(call all-java-files-under, src) -LOCAL_ANNOTATIONS_SRC_FILES := $(call all-java-files-under, $(LATINIME_ANNOTATIONS_SRC_DIR)) LOCAL_SRC_FILES := $(LOCAL_TOOL_SRC_FILES) \ $(filter-out $(addprefix %/, $(notdir $(LOCAL_TOOL_SRC_FILES))), $(LOCAL_MAIN_SRC_FILES)) \ - $(call all-java-files-under, $(DICTTOOL_COMPAT_TESTS_DIR)) \ - $(LOCAL_ANNOTATIONS_SRC_FILES) $(USED_TARGETED_SRC_FILES) \ - $(LATINIME_BASE_SRC_DIR)/latin/Constants.java \ - $(call all-java-files-under, tests) \ - $(call all-java-files-under, $(DICTTOOL_ONDEVICE_TESTS_DIR)) + $(USED_TARGETED_SRC_FILES) \ + $(call all-java-files-under, \ + tests $(DICTTOOL_COMPAT_TESTS_DIR) $(DICTTOOL_ONDEVICE_TESTS_DIR)) LOCAL_JAVA_LIBRARIES := junit -LOCAL_STATIC_JAVA_LIBRARIES := jsr305lib +LOCAL_STATIC_JAVA_LIBRARIES := jsr305lib latinime-common-host LOCAL_REQUIRED_MODULES := $(LATINIME_HOST_NATIVE_LIBNAME) LOCAL_JAR_MANIFEST := etc/manifest.txt LOCAL_MODULE := dicttool_aosp diff --git a/tools/dicttool/compat/android/test/MoreAsserts.java b/tools/dicttool/compat/android/test/MoreAsserts.java deleted file mode 100644 index f56420b9c..000000000 --- a/tools/dicttool/compat/android/test/MoreAsserts.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package android.test; - -import junit.framework.Assert; - -/** - * This is a compatibility class that aims at emulating android.test.MoreAsserts from the - * Android library as simply as possible, and only to the extent that is used by the client classes. - * Its purpose is to provide compatibility without having to pull the whole Android library. - */ -public class MoreAsserts { - public static void assertNotEqual(Object unexpected, Object actual) { - if (equal(unexpected, actual)) { - Assert.fail("expected not to be:<" + unexpected + ">"); - } - } - private static boolean equal(Object a, Object b) { - return a == b || (a != null && a.equals(b)); - } -} diff --git a/tools/dicttool/compat/android/text/TextUtils.java b/tools/dicttool/compat/android/text/TextUtils.java index 5a94b7d4c..82483319e 100644 --- a/tools/dicttool/compat/android/text/TextUtils.java +++ b/tools/dicttool/compat/android/text/TextUtils.java @@ -25,10 +25,7 @@ public class TextUtils { * @return true if str is null or zero length */ public static boolean isEmpty(CharSequence str) { - if (str == null || str.length() == 0) - return true; - else - return false; + return (str == null || str.length() == 0); } /** @@ -45,12 +42,11 @@ public class TextUtils { if (a != null && b != null && (length = a.length()) == b.length()) { if (a instanceof String && b instanceof String) { return a.equals(b); - } else { - for (int i = 0; i < length; i++) { - if (a.charAt(i) != b.charAt(i)) return false; - } - return true; } + for (int i = 0; i < length; i++) { + if (a.charAt(i) != b.charAt(i)) return false; + } + return true; } return false; } @@ -90,7 +86,7 @@ public class TextUtils { * @param tokens an array objects to be joined. Strings will be formed from * the objects by calling object.toString(). */ - public static String join(CharSequence delimiter, Iterable tokens) { + public static String join(CharSequence delimiter, Iterable<?> tokens) { StringBuilder sb = new StringBuilder(); boolean firstTime = true; for (Object token: tokens) { diff --git a/tools/dicttool/compat/android/util/Pair.java b/tools/dicttool/compat/android/util/Pair.java index 5bf34848d..e61e896b7 100644 --- a/tools/dicttool/compat/android/util/Pair.java +++ b/tools/dicttool/compat/android/util/Pair.java @@ -16,7 +16,7 @@ package android.util; -import java.util.Arrays; +import java.util.Objects; public class Pair<T1, T2> { public final T1 mFirst; @@ -29,7 +29,8 @@ public class Pair<T1, T2> { @Override public int hashCode() { - return Arrays.hashCode(new Object[] { mFirst, mSecond }); + return (mFirst == null ? 0 : mFirst.hashCode()) + ^ (mSecond == null ? 0 : mSecond.hashCode()); } @Override @@ -37,7 +38,6 @@ public class Pair<T1, T2> { if (o == this) return true; if (!(o instanceof Pair)) return false; Pair<?, ?> p = (Pair<?, ?>)o; - return ((mFirst == null && p.mFirst == null) || mFirst.equals(p.mFirst)) - && ((mSecond == null && p.mSecond == null) || mSecond.equals(p.mSecond)); + return Objects.equals(mFirst, p.mFirst) && Objects.equals(mSecond, p.mSecond); } } diff --git a/tools/dicttool/compat/android/view/inputmethod/CompletionInfo.java b/tools/dicttool/compat/android/view/inputmethod/CompletionInfo.java index fbce72556..e2f769ec8 100644 --- a/tools/dicttool/compat/android/view/inputmethod/CompletionInfo.java +++ b/tools/dicttool/compat/android/view/inputmethod/CompletionInfo.java @@ -16,6 +16,7 @@ package android.view.inputmethod; +@SuppressWarnings("static-method") public class CompletionInfo { public final String getText() { return ""; } } diff --git a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java b/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java deleted file mode 100644 index c4457a1b7..000000000 --- a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.event; - -import java.util.ArrayList; - -/** - * Compatibility class that stands in for the combiner chain in LatinIME. - * - * This is not used by dicttool, it's just needed by the dependency chain. - */ -// TODO: there should not be a dependency to this in dicttool, so there -// should be a sensible way to separate them cleanly. -public class CombinerChain { - private StringBuilder mComposingWord; - public CombinerChain(final String initialText, final Combiner... combinerList) { - mComposingWord = new StringBuilder(initialText); - } - - public Event processEvent(final ArrayList<Event> previousEvents, final Event newEvent) { - return newEvent; - } - - public void applyProcessedEvent(final Event event) { - mComposingWord.append(event.getTextToCommit()); - } - - public CharSequence getComposingWordWithCombiningFeedback() { - return mComposingWord; - } - - public void reset() { - mComposingWord.setLength(0); - } - - public static Combiner[] createCombiners(final String spec) { - // Dicttool never uses a combiner at all, so we just return a zero-sized array. - return new Combiner[0]; - } -} diff --git a/tools/dicttool/compat/com/android/inputmethod/keyboard/Key.java b/tools/dicttool/compat/com/android/inputmethod/keyboard/Key.java index 1e63bb526..925940650 100644 --- a/tools/dicttool/compat/com/android/inputmethod/keyboard/Key.java +++ b/tools/dicttool/compat/com/android/inputmethod/keyboard/Key.java @@ -16,6 +16,7 @@ package com.android.inputmethod.keyboard; +@SuppressWarnings("static-method") public class Key { public final int getX() { return 0; } public final int getY() { return 0; } diff --git a/tools/dicttool/compat/com/android/inputmethod/keyboard/Keyboard.java b/tools/dicttool/compat/com/android/inputmethod/keyboard/Keyboard.java index 61b209f4d..3d6bfd0a9 100644 --- a/tools/dicttool/compat/com/android/inputmethod/keyboard/Keyboard.java +++ b/tools/dicttool/compat/com/android/inputmethod/keyboard/Keyboard.java @@ -16,6 +16,7 @@ package com.android.inputmethod.keyboard; +@SuppressWarnings("unused") public class Keyboard { private final Key KEY = new Key(); public final Key getKey(final int i) { return KEY; } diff --git a/tools/dicttool/compat/com/android/inputmethod/keyboard/ProximityInfo.java b/tools/dicttool/compat/com/android/inputmethod/keyboard/ProximityInfo.java index 561b6637c..3a068bd5a 100644 --- a/tools/dicttool/compat/com/android/inputmethod/keyboard/ProximityInfo.java +++ b/tools/dicttool/compat/com/android/inputmethod/keyboard/ProximityInfo.java @@ -18,11 +18,10 @@ package com.android.inputmethod.keyboard; public class ProximityInfo { public long getNativeProximityInfo() { return 0l; } - private static native long setProximityInfoNative(String locale, - int displayWidth, int displayHeight, int gridWidth, int gridHeight, - int mostCommonKeyWidth, int mostCommonKeyHeight, int[] proximityCharsArray, - int keyCount, int[] keyXCoordinates, int[] keyYCoordinates, int[] keyWidths, - int[] keyHeights, int[] keyCharCodes, float[] sweetSpotCenterXs, + private static native long setProximityInfoNative(int displayWidth, int displayHeight, + int gridWidth, int gridHeight, int mostCommonKeyWidth, int mostCommonKeyHeight, + int[] proximityCharsArray, int keyCount, int[] keyXCoordinates, int[] keyYCoordinates, + int[] keyWidths, int[] keyHeights, int[] keyCharCodes, float[] sweetSpotCenterXs, float[] sweetSpotCenterYs, float[] sweetSpotRadii); private static native void releaseProximityInfoNative(long nativeProximityInfo); } diff --git a/tools/dicttool/compat/com/android/inputmethod/latin/utils/LanguageModelParam.java b/tools/dicttool/compat/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java index f4ca94a81..b5a729421 100644 --- a/tools/dicttool/compat/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/tools/dicttool/compat/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java @@ -16,5 +16,5 @@ package com.android.inputmethod.latin.utils; -public final class LanguageModelParam { +public final class WordInputEventForPersonalization { } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 3ef03f4bd..b5ed94ccd 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -19,24 +19,28 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.DictDecoder; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import org.xml.sax.SAXException; - import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; -import java.util.ArrayList; +import java.util.HashMap; -import javax.xml.parsers.ParserConfigurationException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; /** * Class grouping utilities for offline dictionary making. @@ -48,126 +52,213 @@ public final class BinaryDictOffdeviceUtils { // Prefix and suffix are arbitrary, the values do not really matter private final static String PREFIX = "dicttool"; private final static String SUFFIX = ".tmp"; + private final static int COPY_BUFFER_SIZE = 8192; - public final static String COMPRESSION = "compressed"; - public final static String ENCRYPTION = "encrypted"; + public static class DecoderChainSpec<T> { + public final static int COMPRESSION = 1; + public final static int ENCRYPTION = 2; - private final static int MAX_DECODE_DEPTH = 8; - private final static int COPY_BUFFER_SIZE = 8192; + private final static int[][] VALID_DECODER_CHAINS = { + { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION } + }; + + private final int mDecoderSpecIndex; + public T mResult; - public static class DecoderChainSpec { - ArrayList<String> mDecoderSpec = new ArrayList<>(); - File mFile; + public DecoderChainSpec() { + mDecoderSpecIndex = 0; + mResult = null; + } + + private DecoderChainSpec(final DecoderChainSpec<T> src) { + mDecoderSpecIndex = src.mDecoderSpecIndex + 1; + mResult = src.mResult; + } - public DecoderChainSpec addStep(final String stepDescription) { - mDecoderSpec.add(stepDescription); - return this; + private String getStepDescription(final int step) { + switch (step) { + case COMPRESSION: + return "compression"; + case ENCRYPTION: + return "encryption"; + default: + return "unknown"; + } } public String describeChain() { final StringBuilder s = new StringBuilder("raw"); - for (final String step : mDecoderSpec) { + for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) { s.append(" > "); - s.append(step); + s.append(getStepDescription(step)); } return s.toString(); } - } - public static void copy(final InputStream input, final OutputStream output) throws IOException { - final byte[] buffer = new byte[COPY_BUFFER_SIZE]; - for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { - output.write(buffer, 0, readBytes); + /** + * Returns the next sequential spec. If exhausted, return null. + */ + public DecoderChainSpec next() { + if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) { + return null; + } + return new DecoderChainSpec(this); } - } - /** - * Returns a decrypted/uncompressed dictionary. - * - * This will decrypt/uncompress any number of times as necessary until it finds the - * dictionary signature, and copy the decoded file to a temporary place. - * If this is not a dictionary, the method returns null. - */ - public static DecoderChainSpec getRawDictionaryOrNull(final File src) { - return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0); + public InputStream getStream(final File src) throws FileNotFoundException, IOException { + InputStream input = new BufferedInputStream(new FileInputStream(src)); + for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) { + switch (step) { + case COMPRESSION: + input = Compress.getUncompressedStream(input); + break; + case ENCRYPTION: + input = Crypt.getDecryptedStream(input); + break; + } + } + return input; + } } - private static DecoderChainSpec getRawDictionaryOrNullInternal( - final DecoderChainSpec spec, final File src, final int depth) { - // Unfortunately the decoding scheme we use can consider any data to be encrypted - // and will product some output, meaning it's not possible to reliably detect encrypted - // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt - // over and over, ending in a stack overflow. Hence we limit the depth at which we try - // decoding the file. - if (depth > MAX_DECODE_DEPTH) return null; - if (BinaryDictDecoderUtils.isBinaryDictionary(src) - || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) { - spec.mFile = src; - return spec; - } - // It's not a raw dictionary - try to see if it's compressed. - final File uncompressedFile = tryGetUncompressedFile(src); - if (null != uncompressedFile) { - final DecoderChainSpec newSpec = - getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1); - if (null == newSpec) return null; - return newSpec.addStep(COMPRESSION); - } - // It's not a compressed either - try to see if it's crypted. - final File decryptedFile = tryGetDecryptedFile(src); - if (null != decryptedFile) { - final DecoderChainSpec newSpec = - getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1); - if (null == newSpec) return null; - return newSpec.addStep(ENCRYPTION); - } - return null; + public interface InputProcessor<T> { + @Nonnull + public T process(@Nonnull final InputStream input) + throws IOException, UnsupportedFormatException; } - /* Try to uncompress the file passed as an argument. - * - * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null - * is returned. - */ - private static File tryGetUncompressedFile(final File src) { - try { + public static class CopyProcessor implements InputProcessor<File> { + @Override @Nonnull + public File process(@Nonnull final InputStream input) throws IOException, + UnsupportedFormatException { final File dst = File.createTempFile(PREFIX, SUFFIX); dst.deleteOnExit(); - try ( - final InputStream input = Compress.getUncompressedStream( - new BufferedInputStream(new FileInputStream(src))); - final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst)) - ) { + try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) { copy(input, output); - return dst; + output.flush(); + output.close(); + if (BinaryDictDecoderUtils.isBinaryDictionary(dst) + || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) { + return dst; + } + } + throw new UnsupportedFormatException("Input stream not at the expected format"); + } + } + + public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> { + // Arbitrarily limit the header length to 32k. Sounds like it would never be larger + // than this. Revisit this if needed later. + private final int MAX_HEADER_LENGTH = 32 * 1024; + @Override @Nonnull + public DictionaryHeader process(final InputStream input) throws IOException, + UnsupportedFormatException { + // Do everything as curtly and ad-hoc as possible for performance. + final byte[] tmpBuffer = new byte[12]; + if (tmpBuffer.length != input.read(tmpBuffer)) { + throw new UnsupportedFormatException("File too short, not a dictionary"); + } + // Ad-hoc check for the magic number. See FormatSpec.java as well as + // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader(). + final int MAGIC_NUMBER_START_OFFSET = 0; + final int VERSION_START_OFFSET = 4; + final int HEADER_SIZE_OFFSET = 8; + final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24) + + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16) + + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8) + + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF); + if (magicNumber != FormatSpec.MAGIC_NUMBER) { + throw new UnsupportedFormatException("Wrong magic number"); } - } catch (final IOException e) { - // Could not uncompress the file: presumably the file is simply not a compressed file - return null; + final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8) + + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF); + if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201 + && version != FormatSpec.VERSION202) { + throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported"); + } + final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24) + + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16) + + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8) + + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF); + if (totalHeaderSize > MAX_HEADER_LENGTH) { + throw new UnsupportedFormatException("Header too large"); + } + final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length]; + readStreamExhaustively(input, headerBuffer); + final HashMap<String, String> attributes = + BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer); + return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes), + new FormatOptions(version, false /* hasTimestamp */)); + } + } + + private static void readStreamExhaustively(final InputStream inputStream, + final byte[] outBuffer) throws IOException, UnsupportedFormatException { + int readBytes = 0; + int readBytesLastCycle = -1; + while (readBytes != outBuffer.length) { + readBytesLastCycle = inputStream.read(outBuffer, readBytes, + outBuffer.length - readBytes); + if (readBytesLastCycle == -1) + throw new UnsupportedFormatException("File shorter than specified in the header" + + " (expected " + outBuffer.length + ", read " + readBytes + ")"); + readBytes += readBytesLastCycle; + } + } + + public static void copy(final InputStream input, final OutputStream output) throws IOException { + final byte[] buffer = new byte[COPY_BUFFER_SIZE]; + for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { + output.write(buffer, 0, readBytes); } } - /* Try to decrypt the file passed as an argument. + /** + * Process a dictionary, decrypting/uncompressing it on the fly as necessary. * - * If the file can be decrypted, the decrypted version is returned. Otherwise, null - * is returned. + * This will execute the given processor repeatedly with the possible alternatives + * for dictionary format until the processor does not throw an exception. + * If the processor succeeds for none of the possible formats, the method returns null. */ - private static File tryGetDecryptedFile(final File src) { - try { - final File dst = File.createTempFile(PREFIX, SUFFIX); - dst.deleteOnExit(); - try ( - final InputStream input = Crypt.getDecryptedStream( - new BufferedInputStream(new FileInputStream(src))); - final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst)) - ) { - copy(input, output); - return dst; + @Nullable + public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src, + @Nonnull final InputProcessor<T> processor) { + @Nonnull DecoderChainSpec spec = new DecoderChainSpec(); + while (null != spec) { + try { + final InputStream input = spec.getStream(src); + spec.mResult = processor.process(input); + try { + input.close(); + } catch (IOException e) { + // CipherInputStream doesn't like being closed without having read the + // entire stream, for some reason. But we don't want to because it's a waste + // of resources. We really, really don't care about this. + // However on close() CipherInputStream does throw this exception, wrapped + // in an IOException so we need to catch it. + if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) { + throw e; + } + } + return spec; + } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) { + // If the format is not the right one for this file, the processor will throw one + // of these exceptions. In our case, that means we should try the next spec, + // since it may still be at another format we haven't tried yet. + // TODO: stop using exceptions for this non-exceptional case. } - } catch (final IOException e) { - // Could not decrypt the file: presumably the file is simply not a crypted file - return null; + spec = spec.next(); } + return null; + } + + /** + * Get a decoder chain spec with a raw dictionary file. This makes a new file on the + * disk ready for any treatment the client wants. + */ + @Nullable + public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) { + return decodeDictionaryForProcess(src, new CopyProcessor()); } static FusionDictionary getDictionary(final String filename, final boolean report) { @@ -177,40 +268,31 @@ public final class BinaryDictOffdeviceUtils { System.out.println("Size : " + file.length() + " bytes"); } try { - if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) { - if (report) { - System.out.println("Format : XML unigram list"); - } - return XmlDictInputOutput.readDictionaryXml( - new BufferedInputStream(new FileInputStream(file)), - null /* shortcuts */, null /* bigrams */); - } - final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); + final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file); if (null == decodedSpec) { throw new RuntimeException("Does not seem to be a dictionary file " + filename); } - if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) { + if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) { if (report) { System.out.println("Format : Combined format"); System.out.println("Packaging : " + decodedSpec.describeChain()); - System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); + System.out.println("Uncompressed size : " + decodedSpec.mResult.length()); } try (final BufferedReader reader = new BufferedReader( - new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) { + new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) { return CombinedInputOutput.readDictionaryCombined(reader); } } final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder( - decodedSpec.mFile, 0, decodedSpec.mFile.length(), + decodedSpec.mResult, 0, decodedSpec.mResult.length(), DictDecoder.USE_BYTEARRAY); if (report) { System.out.println("Format : Binary dictionary format"); System.out.println("Packaging : " + decodedSpec.describeChain()); - System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); + System.out.println("Uncompressed size : " + decodedSpec.mResult.length()); } return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); - } catch (final IOException | SAXException | ParserConfigurationException | - UnsupportedFormatException e) { + } catch (final IOException | UnsupportedFormatException e) { throw new RuntimeException("Can't read file " + filename, e); } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index 23cbee81c..955c5728c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -98,6 +98,7 @@ public class CombinedInputOutput { String word = null; ProbabilityInfo probabilityInfo = new ProbabilityInfo(0); boolean isNotAWord = false; + boolean isPossiblyOffensive = false; ArrayList<WeightedString> bigrams = new ArrayList<>(); ArrayList<WeightedString> shortcuts = new ArrayList<>(); while (null != (line = reader.readLine())) { @@ -106,7 +107,7 @@ public class CombinedInputOutput { if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (null != word) { dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, - isNotAWord); + isNotAWord, isPossiblyOffensive); for (WeightedString s : bigrams) { dict.setBigram(word, s.mWord, s.mProbabilityInfo); } @@ -114,27 +115,37 @@ public class CombinedInputOutput { if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>(); if (!bigrams.isEmpty()) bigrams = new ArrayList<>(); isNotAWord = false; + isPossiblyOffensive = false; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (CombinedFormatUtils.WORD_TAG.equals(params[0])) { - word = params[1]; - } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { - probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), - probabilityInfo.mTimestamp, probabilityInfo.mLevel, - probabilityInfo.mCount); - } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { - final String[] historicalInfoParams = - params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); - if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { - throw new RuntimeException("Wrong format (historical info) : " + line); - } - probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability, - Integer.parseInt(historicalInfoParams[0]), - Integer.parseInt(historicalInfoParams[1]), - Integer.parseInt(historicalInfoParams[2])); - } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) { - isNotAWord = "true".equals(params[1]); + switch (params[0]) { + case CombinedFormatUtils.WORD_TAG: + word = params[1]; + break; + case CombinedFormatUtils.PROBABILITY_TAG: + probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), + probabilityInfo.mTimestamp, probabilityInfo.mLevel, + probabilityInfo.mCount); + break; + case CombinedFormatUtils.HISTORICAL_INFO_TAG: + final String[] historicalInfoParams = params[1].split( + CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + + line); + } + probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability, + Integer.parseInt(historicalInfoParams[0]), + Integer.parseInt(historicalInfoParams[1]), + Integer.parseInt(historicalInfoParams[2])); + break; + case CombinedFormatUtils.NOT_A_WORD_TAG: + isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]); + break; + case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG: + isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]); + break; } } } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) { @@ -189,7 +200,8 @@ public class CombinedInputOutput { } } if (null != word) { - dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); + dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord, + isPossiblyOffensive); for (WeightedString s : bigrams) { dict.setBigram(word, s.mWord, s.mProbabilityInfo); } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java index 0d93c7fa9..8fdf7633f 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java @@ -18,7 +18,9 @@ package com.android.inputmethod.latin.dicttool; public class CommandList { public static void populate() { + // TODO: Move some commands to native code. Dicttool.addCommand("info", Info.class); + Dicttool.addCommand("header", Header.class); Dicttool.addCommand("diff", Diff.class); Dicttool.addCommand("compress", Compress.Compressor.class); Dicttool.addCommand("uncompress", Compress.Uncompressor.class); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index 3d0557b5c..6187853c8 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -27,9 +27,6 @@ import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.makedict.Ver2DictEncoder; import com.android.inputmethod.latin.makedict.Ver4DictEncoder; -import org.xml.sax.SAXException; - -import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -37,13 +34,10 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.util.Arrays; import java.util.LinkedList; -import javax.xml.parsers.ParserConfigurationException; - /** * Main class/method for DictionaryMaker. */ @@ -53,59 +47,37 @@ public class DictionaryMaker { private static final String OPTION_VERSION_2 = "-2"; private static final String OPTION_VERSION_4 = "-4"; private static final String OPTION_INPUT_SOURCE = "-s"; - private static final String OPTION_INPUT_BIGRAM_XML = "-b"; - private static final String OPTION_INPUT_SHORTCUT_XML = "-c"; private static final String OPTION_OUTPUT_BINARY = "-d"; - private static final String OPTION_OUTPUT_XML = "-x"; private static final String OPTION_OUTPUT_COMBINED = "-o"; private static final String OPTION_HELP = "-h"; + private static final String OPTION_CODE_POINT_TABLE = "-t"; + private static final String OPTION_CODE_POINT_TABLE_OFF = "off"; + private static final String OPTION_CODE_POINT_TABLE_ON = "on"; public final String mInputBinary; public final String mInputCombined; - public final String mInputUnigramXml; - public final String mInputShortcutXml; - public final String mInputBigramXml; public final String mOutputBinary; - public final String mOutputXml; public final String mOutputCombined; public final int mOutputBinaryFormatVersion; + public final int mCodePointTableMode; private void checkIntegrity() throws IOException { checkHasExactlyOneInput(); checkHasAtLeastOneOutput(); checkNotSameFile(mInputBinary, mOutputBinary); - checkNotSameFile(mInputBinary, mOutputXml); checkNotSameFile(mInputCombined, mOutputBinary); - checkNotSameFile(mInputCombined, mOutputXml); - checkNotSameFile(mInputUnigramXml, mOutputBinary); - checkNotSameFile(mInputUnigramXml, mOutputXml); - checkNotSameFile(mInputUnigramXml, mOutputCombined); - checkNotSameFile(mInputShortcutXml, mOutputBinary); - checkNotSameFile(mInputShortcutXml, mOutputXml); - checkNotSameFile(mInputShortcutXml, mOutputCombined); - checkNotSameFile(mInputBigramXml, mOutputBinary); - checkNotSameFile(mInputBigramXml, mOutputXml); - checkNotSameFile(mInputBigramXml, mOutputCombined); - checkNotSameFile(mOutputBinary, mOutputXml); checkNotSameFile(mOutputBinary, mOutputCombined); - checkNotSameFile(mOutputXml, mOutputCombined); } private void checkHasExactlyOneInput() { - if (null == mInputUnigramXml && null == mInputBinary && null == mInputCombined) { + if (null == mInputBinary && null == mInputCombined) { throw new RuntimeException("No input file specified"); - } else if ((null != mInputUnigramXml && null != mInputBinary) - || (null != mInputUnigramXml && null != mInputCombined) - || (null != mInputBinary && null != mInputCombined)) { + } else if (null != mInputBinary && null != mInputCombined) { throw new RuntimeException("Several input files specified"); - } else if ((null != mInputBinary || null != mInputCombined) - && (null != mInputBigramXml || null != mInputShortcutXml)) { - throw new RuntimeException("Separate bigrams/shortcut files are only supported" - + " with XML input (other formats include bigrams and shortcuts already)"); } } private void checkHasAtLeastOneOutput() { - if (null == mOutputBinary && null == mOutputXml && null == mOutputCombined) { + if (null == mOutputBinary && null == mOutputCombined) { throw new RuntimeException("No output specified"); } } @@ -122,22 +94,20 @@ public class DictionaryMaker { } } - private void displayHelp() { + private static void displayHelp() { MakedictLog.i(getHelp()); } public static String getHelp() { return "Usage: makedict " - + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] " + "| [-s <combined format input]" - + "| [-s <binary input>] [-d <binary output>] [-x <xml output>] " - + " [-o <combined output>]" + + "| [-s <binary input>] [-d <binary output>]" + + " [-o <combined output>] [-t <code point table switch: on/off/auto>]" + "[-2] [-3] [-4]\n" + "\n" + " Converts a source dictionary file to one or several outputs.\n" - + " Source can be an XML file, with an optional XML bigrams file, or a\n" - + " binary dictionary file.\n" - + " Binary version 2 (Jelly Bean), 3, 4, XML and\n" + + " Source can be a binary dictionary file or a combined format file.\n" + + " Binary version 2 (Jelly Bean), 3, 4, and\n" + " combined format outputs are supported."; } @@ -148,13 +118,11 @@ public class DictionaryMaker { } String inputBinary = null; String inputCombined = null; - String inputUnigramXml = null; - String inputShortcutXml = null; - String inputBigramXml = null; String outputBinary = null; - String outputXml = null; String outputCombined = null; - int outputBinaryFormatVersion = 2; // the default version is 2. + int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202. + // Don't use code point table by default. + int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF; while (!args.isEmpty()) { final String arg = args.get(0); @@ -172,41 +140,42 @@ public class DictionaryMaker { throw new IllegalArgumentException("Option " + arg + " is unknown or " + "requires an argument"); } - String filename = args.get(0); + String argValue = args.get(0); args.remove(0); if (OPTION_INPUT_SOURCE.equals(arg)) { - if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) { - inputUnigramXml = filename; - } else if (CombinedInputOutput.isCombinedDictionary(filename)) { - inputCombined = filename; - } else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) { - inputBinary = filename; + if (CombinedInputOutput.isCombinedDictionary(argValue)) { + inputCombined = argValue; + } else if (BinaryDictDecoderUtils.isBinaryDictionary(argValue)) { + inputBinary = argValue; } else { throw new IllegalArgumentException( - "Unknown format for file " + filename); + "Unknown format for file " + argValue); } - } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) { - inputShortcutXml = filename; - } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) { - inputBigramXml = filename; } else if (OPTION_OUTPUT_BINARY.equals(arg)) { - outputBinary = filename; - } else if (OPTION_OUTPUT_XML.equals(arg)) { - outputXml = filename; + outputBinary = argValue; } else if (OPTION_OUTPUT_COMBINED.equals(arg)) { - outputCombined = filename; + outputCombined = argValue; + } else if (OPTION_CODE_POINT_TABLE.equals(arg)) { + if (OPTION_CODE_POINT_TABLE_OFF.equals(argValue)) { + codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF; + } else if (OPTION_CODE_POINT_TABLE_ON.equals(argValue)) { + codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_ON; + } else { + throw new IllegalArgumentException( + "Unknown argument to -t option : " + argValue); + } } else { throw new IllegalArgumentException("Unknown option : " + arg); } } } else { - if (null == inputBinary && null == inputUnigramXml) { + if (null == inputBinary) { if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) { inputBinary = arg; } else if (CombinedInputOutput.isCombinedDictionary(arg)) { inputCombined = arg; } else { - inputUnigramXml = arg; + throw new IllegalArgumentException("Unknown format for file " + arg); } } else if (null == outputBinary) { outputBinary = arg; @@ -218,20 +187,16 @@ public class DictionaryMaker { mInputBinary = inputBinary; mInputCombined = inputCombined; - mInputUnigramXml = inputUnigramXml; - mInputShortcutXml = inputShortcutXml; - mInputBigramXml = inputBigramXml; mOutputBinary = outputBinary; - mOutputXml = outputXml; mOutputCombined = outputCombined; mOutputBinaryFormatVersion = outputBinaryFormatVersion; + mCodePointTableMode = codePointTableMode; checkIntegrity(); } } public static void main(String[] args) - throws FileNotFoundException, ParserConfigurationException, SAXException, IOException, - UnsupportedFormatException { + throws FileNotFoundException, IOException, UnsupportedFormatException { final Arguments parsedArgs = new Arguments(args); FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs); writeOutputToParsedArgs(parsedArgs, dictionary); @@ -244,14 +209,11 @@ public class DictionaryMaker { * @return the read dictionary. */ private static FusionDictionary readInputFromParsedArgs(final Arguments args) - throws IOException, UnsupportedFormatException, ParserConfigurationException, - SAXException, FileNotFoundException { + throws IOException, UnsupportedFormatException, FileNotFoundException { if (null != args.mInputBinary) { return readBinaryFile(args.mInputBinary); } else if (null != args.mInputCombined) { return readCombinedFile(args.mInputCombined); - } else if (null != args.mInputUnigramXml) { - return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml); } else { throw new RuntimeException("No input file specified"); } @@ -290,38 +252,6 @@ public class DictionaryMaker { } } - private static BufferedInputStream getBufferedFileInputStream(final String filename) - throws FileNotFoundException { - if (filename == null) { - return null; - } - return new BufferedInputStream(new FileInputStream(filename)); - } - - /** - * Read a dictionary from a unigram XML file, and optionally a bigram XML file. - * - * @param unigramXmlFilename the name of the unigram XML file. May not be null. - * @param shortcutXmlFilename the name of the shortcut/whitelist XML file, or null if none. - * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams. - * @return the read dictionary. - * @throws FileNotFoundException if one of the files can't be found - * @throws SAXException if one or more of the XML files is not well-formed - * @throws IOException if one the input files can't be read - * @throws ParserConfigurationException if the system can't create a SAX parser - */ - private static FusionDictionary readXmlFile(final String unigramXmlFilename, - final String shortcutXmlFilename, final String bigramXmlFilename) - throws FileNotFoundException, SAXException, IOException, ParserConfigurationException { - try ( - final BufferedInputStream unigrams = getBufferedFileInputStream(unigramXmlFilename); - final BufferedInputStream shortcuts = getBufferedFileInputStream(shortcutXmlFilename); - final BufferedInputStream bigrams = getBufferedFileInputStream(bigramXmlFilename); - ) { - return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams); - } - } - /** * Invoke the right output method according to args. * @@ -335,10 +265,8 @@ public class DictionaryMaker { throws FileNotFoundException, IOException, UnsupportedFormatException, IllegalArgumentException { if (null != args.mOutputBinary) { - writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion); - } - if (null != args.mOutputXml) { - writeXmlDictionary(args.mOutputXml, dict); + writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion, + args.mCodePointTableMode); } if (null != args.mOutputCombined) { writeCombinedDictionary(args.mOutputCombined, dict); @@ -351,39 +279,26 @@ public class DictionaryMaker { * @param outputFilename the name of the file to write to. * @param dict the dictionary to write. * @param version the binary format version to use. + * @param codePointTableMode the value to decide how we treat the code point table. * @throws FileNotFoundException if the output file can't be created. * @throws IOException if the output file can't be written to. */ private static void writeBinaryDictionary(final String outputFilename, - final FusionDictionary dict, final int version) + final FusionDictionary dict, final int version, final int codePointTableMode) throws FileNotFoundException, IOException, UnsupportedFormatException { final File outputFile = new File(outputFilename); final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version); final DictEncoder dictEncoder; if (version == FormatSpec.VERSION4) { + // VERSION4 doesn't use the code point table. dictEncoder = new Ver4DictEncoder(outputFile); } else { - dictEncoder = new Ver2DictEncoder(outputFile); + dictEncoder = new Ver2DictEncoder(outputFile, codePointTableMode); } dictEncoder.writeDictionary(dict, formatOptions); } /** - * Write the dictionary in XML format to the specified filename. - * - * @param outputFilename the name of the file to write to. - * @param dict the dictionary to write. - * @throws FileNotFoundException if the output file can't be created. - * @throws IOException if the output file can't be written to. - */ - private static void writeXmlDictionary(final String outputFilename, - final FusionDictionary dict) throws FileNotFoundException, IOException { - try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) { - XmlDictInputOutput.writeDictionaryXml(writer, dict); - } - } - - /** * Write the dictionary in the combined format to the specified filename. * * @param outputFilename the name of the file to write to. diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 94d1ae8bb..f97fbef2c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -128,14 +128,14 @@ public class Diff extends Dicttool.Command { + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord()); hasDifferences = true; } - if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) { - System.out.println("Blacklist: " + word0Property.mWord + " " - + word0Property.mIsBlacklistEntry + " -> " - + word1PtNode.getIsBlacklistEntry()); + if (word0Property.mIsPossiblyOffensive != word1PtNode.getIsPossiblyOffensive()) { + System.out.println("Possibly-offensive: " + word0Property.mWord + " " + + word0Property.mIsPossiblyOffensive + " -> " + + word1PtNode.getIsPossiblyOffensive()); hasDifferences = true; } hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, - "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); + "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams()); hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, "Shortcut", word0Property.mShortcutTargets, word1PtNode.getShortcutTargets()); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java new file mode 100644 index 000000000..aa1762ff1 --- /dev/null +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.dicttool; + +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + +import java.io.File; +import java.util.Arrays; + +public class Header extends Dicttool.Command { + public static final String COMMAND = "header"; + + public Header() { + } + + @Override + public String getHelp() { + return COMMAND + " <filename>: prints the header contents of a dictionary file"; + } + + @Override + public void run() throws UnsupportedFormatException { + final boolean plumbing; + if (mArgs.length > 0 && "-p".equals(mArgs[0])) { + plumbing = true; + mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length); + } else { + plumbing = false; + } + if (mArgs.length < 1) { + throw new RuntimeException("Not enough arguments for command " + COMMAND); + } + final String filename = mArgs[0]; + final File dictFile = new File(filename); + final DecoderChainSpec<DictionaryHeader> spec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile, + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); + if (null == spec) { + throw new UnsupportedFormatException(filename + + " doesn't seem to be a valid version 2 dictionary file"); + } + + final DictionaryHeader header = spec.mResult; + System.out.println("Dictionary : " + dictFile.getAbsolutePath()); + System.out.println("Size : " + dictFile.length() + " bytes"); + System.out.println("Format : Binary dictionary format"); + System.out.println("Format version : " + header.mFormatOptions.mVersion); + System.out.println("Packaging : " + spec.describeChain()); + System.out.println("Header attributes :"); + System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing)); + } +} diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 9b2567fd3..b8a64e31a 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -45,8 +45,8 @@ public class Info extends Dicttool.Command { int whitelistCount = 0; for (final WordProperty wordProperty : dict) { ++wordCount; - if (null != wordProperty.mBigrams) { - bigramCount += wordProperty.mBigrams.size(); + if (wordProperty.mHasNgrams) { + bigramCount += wordProperty.mNgrams.size(); } if (null != wordProperty.mShortcutTargets) { shortcutCount += wordProperty.mShortcutTargets.size(); @@ -64,8 +64,7 @@ public class Info extends Dicttool.Command { + " whitelist entries)"); } - private static void showWordInfo(final FusionDictionary dict, final String word, - final boolean plumbing) { + private static void showWordInfo(final FusionDictionary dict, final String word) { final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); if (null == ptNode) { System.out.println(word + " is not in the dictionary"); @@ -76,8 +75,8 @@ public class Info extends Dicttool.Command { if (ptNode.getIsNotAWord()) { System.out.println(" Is not a word"); } - if (ptNode.getIsBlacklistEntry()) { - System.out.println(" Is a blacklist entry"); + if (ptNode.getIsPossiblyOffensive()) { + System.out.println(" Is possibly offensive"); } final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets(); if (null == shortcutTargets || shortcutTargets.isEmpty()) { @@ -124,7 +123,7 @@ public class Info extends Dicttool.Command { showInfo(dict, plumbing); } else { for (int i = 1; i < mArgs.length; ++i) { - showWordInfo(dict, mArgs[i], plumbing); + showWordInfo(dict, mArgs[i]); } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java index 808e1d4c8..0b1fb88bc 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java @@ -20,8 +20,6 @@ import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import java.io.FileNotFoundException; import java.io.IOException; -import javax.xml.parsers.ParserConfigurationException; -import org.xml.sax.SAXException; public class Makedict extends Dicttool.Command { public static final String COMMAND = "makedict"; @@ -35,8 +33,7 @@ public class Makedict extends Dicttool.Command { } @Override - public void run() throws FileNotFoundException, IOException, ParserConfigurationException, - SAXException, UnsupportedFormatException { + public void run() throws FileNotFoundException, IOException, UnsupportedFormatException { DictionaryMaker.main(mArgs); } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java index 1f6798269..3efa10a80 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java @@ -16,10 +16,10 @@ package com.android.inputmethod.latin.dicttool; -import java.io.BufferedInputStream; +import com.android.inputmethod.latin.makedict.DictionaryHeader; + import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -79,16 +79,16 @@ public class Package { if (mArgs.length != 2) { throw new RuntimeException("Too many/too few arguments for command " + COMMAND); } - final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec = - BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0])); + final BinaryDictOffdeviceUtils.DecoderChainSpec<DictionaryHeader> decodedSpec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(new File(mArgs[0]), + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); if (null == decodedSpec) { System.out.println(mArgs[0] + " does not seem to be a dictionary"); return; } System.out.println("Packaging : " + decodedSpec.describeChain()); - System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); try ( - final InputStream input = getFileInputStream(decodedSpec.mFile); + final InputStream input = decodedSpec.getStream(new File(mArgs[0])); final OutputStream output = new BufferedOutputStream( getFileOutputStreamOrStdOut(mArgs[1])) ) { diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java index b6383d788..e2dd5199b 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java @@ -16,10 +16,10 @@ package com.android.inputmethod.latin.dicttool; +import com.android.inputmethod.latin.common.FileUtils; import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests; import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests; import com.android.inputmethod.latin.makedict.FusionDictionaryTest; -import com.android.inputmethod.latin.utils.FileUtils; import java.io.File; import java.io.IOException; diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java deleted file mode 100644 index bdec44761..000000000 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.latin.dicttool; - -import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.ProbabilityInfo; -import com.android.inputmethod.latin.makedict.WeightedString; -import com.android.inputmethod.latin.makedict.WordProperty; - -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.TreeSet; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -/** - * Reads and writes XML files for a FusionDictionary. - * - * All functions in this class are static. - */ -public class XmlDictInputOutput { - - private static final String ROOT_TAG = "wordlist"; - private static final String WORD_TAG = "w"; - private static final String BIGRAM_TAG = "bigram"; - private static final String SHORTCUT_TAG = "shortcut"; - private static final String PROBABILITY_ATTR = "f"; - private static final String WORD_ATTR = "word"; - private static final String NOT_A_WORD_ATTR = "not_a_word"; - - /** - * SAX handler for a unigram XML file. - */ - static private class UnigramHandler extends DefaultHandler { - // Parser states - private static final int START = 1; - private static final int WORD = 2; - private static final int UNKNOWN = 3; - private static final int SHORTCUT_ONLY_WORD_PROBABILITY = 1; - - FusionDictionary mDictionary; - int mState; // the state of the parser - int mFreq; // the currently read freq - String mWord; // the current word - final HashMap<String, ArrayList<WeightedString>> mShortcutsMap; - - /** - * Create the handler. - * - * @param shortcuts the shortcuts as a map. This may be empty, but may not be null. - */ - public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts) { - mDictionary = null; - mShortcutsMap = shortcuts; - mWord = ""; - mState = START; - mFreq = 0; - } - - public FusionDictionary getFinalDictionary() { - final FusionDictionary dict = mDictionary; - for (final String shortcutOnly : mShortcutsMap.keySet()) { - if (dict.hasWord(shortcutOnly)) continue; - dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY), - mShortcutsMap.get(shortcutOnly), true /* isNotAWord */); - } - mDictionary = null; - mShortcutsMap.clear(); - mWord = ""; - mState = START; - mFreq = 0; - return dict; - } - - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) { - if (WORD_TAG.equals(localName)) { - mState = WORD; - mWord = ""; - for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { - final String attrName = attrs.getLocalName(attrIndex); - if (PROBABILITY_ATTR.equals(attrName)) { - mFreq = Integer.parseInt(attrs.getValue(attrIndex)); - } - } - } else if (ROOT_TAG.equals(localName)) { - final HashMap<String, String> attributes = new HashMap<>(); - for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { - final String attrName = attrs.getLocalName(attrIndex); - attributes.put(attrName, attrs.getValue(attrIndex)); - } - mDictionary = new FusionDictionary(new PtNodeArray(), - new DictionaryOptions(attributes)); - } else { - mState = UNKNOWN; - } - } - - @Override - public void characters(char[] ch, int start, int length) { - if (WORD == mState) { - // The XML parser is free to return text in arbitrary chunks one after the - // other. In particular, this happens in some implementations when it finds - // an escape code like "&". - mWord += String.copyValueOf(ch, start, length); - } - } - - @Override - public void endElement(String uri, String localName, String qName) { - if (WORD == mState) { - mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord), - false /* isNotAWord */); - mState = START; - } - } - } - - static private class AssociativeListHandler extends DefaultHandler { - private final String SRC_TAG; - private final String SRC_ATTRIBUTE; - private final String DST_TAG; - private final String DST_ATTRIBUTE; - private final String DST_FREQ; - - // In this version of the XML file, the bigram frequency is given as an int 0..XML_MAX - private final static int XML_MAX = 256; - // In memory and in the binary dictionary the bigram frequency is 0..MEMORY_MAX - private final static int MEMORY_MAX = 256; - private final static int XML_TO_MEMORY_RATIO = XML_MAX / MEMORY_MAX; - - private String mSrc; - private final HashMap<String, ArrayList<WeightedString>> mAssocMap; - - public AssociativeListHandler(final String srcTag, final String srcAttribute, - final String dstTag, final String dstAttribute, final String dstFreq) { - SRC_TAG = srcTag; - SRC_ATTRIBUTE = srcAttribute; - DST_TAG = dstTag; - DST_ATTRIBUTE = dstAttribute; - DST_FREQ = dstFreq; - mSrc = null; - mAssocMap = new HashMap<>(); - } - - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) { - if (SRC_TAG.equals(localName)) { - mSrc = attrs.getValue(uri, SRC_ATTRIBUTE); - } else if (DST_TAG.equals(localName)) { - String dst = attrs.getValue(uri, DST_ATTRIBUTE); - int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ)); - WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO); - ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc); - if (null == bigramList) bigramList = new ArrayList<>(); - bigramList.add(bigram); - mAssocMap.put(mSrc, bigramList); - } - } - - protected int getValueFromFreqString(final String freqString) { - return Integer.parseInt(freqString); - } - - // This may return an empty map, but will never return null. - public HashMap<String, ArrayList<WeightedString>> getAssocMap() { - return mAssocMap; - } - } - - /** - * SAX handler for a bigram XML file. - */ - static private class BigramHandler extends AssociativeListHandler { - private final static String BIGRAM_W1_TAG = "bi"; - private final static String BIGRAM_W2_TAG = "w"; - private final static String BIGRAM_W1_ATTRIBUTE = "w1"; - private final static String BIGRAM_W2_ATTRIBUTE = "w2"; - private final static String BIGRAM_FREQ_ATTRIBUTE = "p"; - - public BigramHandler() { - super(BIGRAM_W1_TAG, BIGRAM_W1_ATTRIBUTE, BIGRAM_W2_TAG, BIGRAM_W2_ATTRIBUTE, - BIGRAM_FREQ_ATTRIBUTE); - } - - // As per getAssocMap(), this never returns null. - public HashMap<String, ArrayList<WeightedString>> getBigramMap() { - return getAssocMap(); - } - } - - /** - * SAX handler for a shortcut & whitelist XML file. - */ - static private class ShortcutAndWhitelistHandler extends AssociativeListHandler { - private final static String ENTRY_TAG = "entry"; - private final static String ENTRY_ATTRIBUTE = "shortcut"; - private final static String TARGET_TAG = "target"; - private final static String REPLACEMENT_ATTRIBUTE = "replacement"; - private final static String TARGET_PRIORITY_ATTRIBUTE = "priority"; - private final static String WHITELIST_MARKER = "whitelist"; - private final static int WHITELIST_FREQ_VALUE = 15; - private final static int MIN_FREQ = 0; - private final static int MAX_FREQ = 14; - - public ShortcutAndWhitelistHandler() { - super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE, - TARGET_PRIORITY_ATTRIBUTE); - } - - @Override - protected int getValueFromFreqString(final String freqString) { - if (WHITELIST_MARKER.equals(freqString)) { - return WHITELIST_FREQ_VALUE; - } - final int intValue = super.getValueFromFreqString(freqString); - if (intValue < MIN_FREQ || intValue > MAX_FREQ) { - throw new RuntimeException("Shortcut freq out of range. Accepted range is " - + MIN_FREQ + ".." + MAX_FREQ); - } - return intValue; - } - - // As per getAssocMap(), this never returns null. - public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() { - return getAssocMap(); - } - } - - /** - * Basic test to find out whether the file is in the unigram XML format or not. - * - * Concretely this only tests the header line. - * - * @param filename The name of the file to test. - * @return true if the file is in the unigram XML format, false otherwise - */ - public static boolean isXmlUnigramDictionary(final String filename) { - try (final BufferedReader reader = new BufferedReader( - new InputStreamReader(new FileInputStream(filename), "UTF-8"))) { - final String firstLine = reader.readLine(); - return firstLine.matches("^\\s*<wordlist .*>\\s*$"); - } catch (final IOException e) { - return false; - } - } - - /** - * Reads a dictionary from an XML file. - * - * This is the public method that will parse an XML file and return the corresponding memory - * representation. - * - * @param unigrams the file to read the data from. - * @param shortcuts the file to read the shortcuts & whitelist from, or null. - * @param bigrams the file to read the bigrams from, or null. - * @return the in-memory representation of the dictionary. - */ - public static FusionDictionary readDictionaryXml(final BufferedInputStream unigrams, - final BufferedInputStream shortcuts, final BufferedInputStream bigrams) - throws SAXException, IOException, ParserConfigurationException { - final SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - final SAXParser parser = factory.newSAXParser(); - final BigramHandler bigramHandler = new BigramHandler(); - if (null != bigrams) parser.parse(bigrams, bigramHandler); - - final ShortcutAndWhitelistHandler shortcutAndWhitelistHandler = - new ShortcutAndWhitelistHandler(); - if (null != shortcuts) parser.parse(shortcuts, shortcutAndWhitelistHandler); - - final UnigramHandler unigramHandler = - new UnigramHandler(shortcutAndWhitelistHandler.getShortcutAndWhitelistMap()); - parser.parse(unigrams, unigramHandler); - final FusionDictionary dict = unigramHandler.getFinalDictionary(); - final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap(); - for (final String firstWord : bigramMap.keySet()) { - if (!dict.hasWord(firstWord)) continue; - final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord); - for (final WeightedString bigram : bigramList) { - if (!dict.hasWord(bigram.mWord)) continue; - dict.setBigram(firstWord, bigram.mWord, bigram.mProbabilityInfo); - } - } - return dict; - } - - /** - * Reads a dictionary in the first, legacy XML format - * - * This method reads data from the parser and creates a new FusionDictionary with it. - * The format parsed by this method is the format used before Ice Cream Sandwich, - * which has no support for bigrams or shortcuts/whitelist. - * It is important to note that this method expects the parser to have already eaten - * the first, all-encompassing tag. - * - * @param xpp the parser to read the data from. - * @return the parsed dictionary. - */ - - /** - * Writes a dictionary to an XML file. - * - * The output format is the "second" format, which supports bigrams and shortcuts/whitelist. - * - * @param destination a destination stream to write to. - * @param dict the dictionary to write. - */ - public static void writeDictionaryXml(final BufferedWriter destination, - final FusionDictionary dict) throws IOException { - final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>(); - for (WordProperty wordProperty : dict) { - wordPropertiesInDict.add(wordProperty); - } - // TODO: use an XMLSerializer if this gets big - destination.write("<wordlist format=\"2\""); - for (final String key : dict.mOptions.mAttributes.keySet()) { - final String value = dict.mOptions.mAttributes.get(key); - destination.write(" " + key + "=\"" + value + "\""); - } - destination.write(">\n"); - destination.write("<!-- Warning: there is no code to read this format yet. -->\n"); - for (WordProperty wordProperty : wordPropertiesInDict) { - destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord - + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() - + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") - + "\">"); - if (null != wordProperty.mShortcutTargets) { - destination.write("\n"); - for (WeightedString target : wordProperty.mShortcutTargets) { - destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" - + target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG - + ">\n"); - } - destination.write(" "); - } - if (null != wordProperty.mBigrams) { - destination.write("\n"); - for (WeightedString bigram : wordProperty.mBigrams) { - destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" - + bigram.getProbability() + "\">" + bigram.mWord - + "</" + BIGRAM_TAG + ">\n"); - } - destination.write(" "); - } - destination.write("</" + WORD_TAG + ">\n"); - } - destination.write("</wordlist>\n"); - destination.close(); - } -} diff --git a/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java b/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java index a4ad6b514..dcc4e1972 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java @@ -16,6 +16,7 @@ package com.android.inputmethod.latin.personalization; +@SuppressWarnings("unused") public class PersonalizationHelper { public static void currentTimeChangedForTesting(final int currentTimestamp) { } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index 0236a446d..e68aeb0eb 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -16,10 +16,17 @@ package com.android.inputmethod.latin.dicttool; +import com.android.inputmethod.latin.common.CodePointUtils; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils; +import com.android.inputmethod.latin.dicttool.Compress; +import com.android.inputmethod.latin.dicttool.Crypt; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; +import com.android.inputmethod.latin.makedict.BinaryDictUtils; import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -35,13 +42,37 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; /** * Unit tests for BinaryDictOffdeviceUtils */ public class BinaryDictOffdeviceUtilsTests extends TestCase { private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance + private static final int CODE_POINT_SET_SIZE = 300; + final Random mRandom; + private static final ArrayList<String> sWords = new ArrayList<>(); + + public BinaryDictOffdeviceUtilsTests(final long seed, final int maxUnigrams) { + super(); + mRandom = new Random(seed); + sWords.clear(); + generateWords(maxUnigrams, mRandom); + } + + private static void generateWords(final int maxUnigrams, final Random random) { + final int[] codePointSet = CodePointUtils.generateCodePointSet( + CODE_POINT_SET_SIZE, random); + final Set<String> wordSet = new HashSet<>(); + while (wordSet.size() < maxUnigrams) { + wordSet.add(CodePointUtils.generateWord(random, codePointSet)); + } + sWords.addAll(wordSet); + } public void testGetRawDictWorks() throws IOException, UnsupportedFormatException { final String VERSION = "1"; @@ -54,31 +85,31 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE); testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions); - dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */); - dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); final File dst = File.createTempFile("testGetRawDict", ".tmp"); dst.deleteOnExit(); try (final OutputStream out = Compress.getCompressedStream( - Compress.getCompressedStream( - Compress.getCompressedStream( - new BufferedOutputStream(new FileOutputStream(dst)))))) { + new BufferedOutputStream(new FileOutputStream(dst)))) { final DictEncoder dictEncoder = new Ver2DictEncoder(out); - dictEncoder.writeDictionary(dict, new FormatOptions(2, false)); + dictEncoder.writeDictionary(dict, new FormatOptions(FormatSpec.VERSION202, false)); } // Test for an actually compressed dictionary and its contents - final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec = + final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodeSpec = BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst); - for (final String step : decodeSpec.mDecoderSpec) { - assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step); - } - assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size()); - final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0, - decodeSpec.mFile.length()); + assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain()); + final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mResult, 0, + decodeSpec.mResult.length()); final FusionDictionary resultDict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get( @@ -119,4 +150,63 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { assertNull("Wrongly identified data file", BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst)); } + + public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt) + throws IOException, UnsupportedFormatException { + final String dictName = "testHeaderReaderProcessor"; + final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; + final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5; + final HashMap<String, String> options = new HashMap<>(); + // Required attributes + options.put("dictionary", "main:en_US"); + options.put("locale", "en_US"); + options.put("version", Integer.toString(mRandom.nextInt())); + // Add some random options for test + final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1); + for (int i = 0; i < numberOfOptionsToAdd; ++i) { + options.put(sWords.get(2 * i), sWords.get(2 * 1 + 1)); + } + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + new DictionaryOptions(options)); + + for (int i = 0; i < sWords.size(); ++i) { + final String word = sWords.get(i); + dict.add(word, new ProbabilityInfo(TEST_FREQ), null /* shortcuts */, + false /* isNotAWord */, false /* isPossiblyOffensive */); + } + + File file = File.createTempFile(dictName, ".tmp"); + final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); + dictEncoder.writeDictionary(dict, formatOptions); + + if (compress) { + final File rawFile = file; + file = File.createTempFile(dictName + ".compress", ".tmp"); + final Compress.Compressor compressCommand = new Compress.Compressor(); + compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() }); + compressCommand.run(); + } + if (crypt) { + final File rawFile = file; + file = File.createTempFile(dictName + ".crypt", ".tmp"); + final Crypt.Encrypter cryptCommand = new Crypt.Encrypter(); + cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() }); + cryptCommand.run(); + } + + final DecoderChainSpec<DictionaryHeader> spec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file, + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); + assertNotNull("Can't decode a dictionary we just wrote : " + file, spec); + final DictionaryHeader header = spec.mResult; + assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""), + spec.describeChain()); + assertEquals(header.mDictionaryOptions.mAttributes, options); + } + + public void testHeaderReaderProcessor() throws IOException, UnsupportedFormatException { + runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */); + runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */); + runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */); + } } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java index aa228e72c..dc9981d1a 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java @@ -33,11 +33,16 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase { public void testFlattenNodes() { final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(new HashMap<String, String>())); - dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */); - dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); + dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */, + false /* isPossiblyOffensive */); final ArrayList<PtNodeArray> result = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); assertEquals(4, result.size()); diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java index 71f8ac8d4..1a4f096e4 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java @@ -67,17 +67,18 @@ public class FusionDictionaryTest extends TestCase { } } - private void checkDictionary(final FusionDictionary dict, final ArrayList<String> words, - int limit) { + private static void checkDictionary(final FusionDictionary dict, final ArrayList<String> words, + final int limit) { assertNotNull(dict); + int count = limit; for (final String word : words) { - if (--limit < 0) return; + if (--count < 0) return; final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); assertNotNull(ptNode); } } - private String dumpWord(final String word) { + private static String dumpWord(final String word) { final StringBuilder sb = new StringBuilder(""); for (int i = 0; i < word.length(); i = word.offsetByCodePoints(i, 1)) { sb.append(word.codePointAt(i)); @@ -86,7 +87,7 @@ public class FusionDictionaryTest extends TestCase { return sb.toString(); } - private void dumpDict(final FusionDictionary dict) { + private static void dumpDict(final FusionDictionary dict) { for (WordProperty wordProperty : dict) { System.out.println("Word " + dumpWord(wordProperty.mWord)); } @@ -101,7 +102,8 @@ public class FusionDictionaryTest extends TestCase { prepare(time); for (int i = 0; i < sWords.size(); ++i) { System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i))); - dict.add(sWords.get(i), new ProbabilityInfo(180), null, false); + dict.add(sWords.get(i), new ProbabilityInfo(180), null, false, + false /* isPossiblyOffensive */); dumpDict(dict); checkDictionary(dict, sWords, i); } diff --git a/tools/make-keyboard-text/res/values-az-rAZ/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-az-rAZ/donottranslate-more-keys.xml index 54aa570b6..52fe5658c 100644 --- a/tools/make-keyboard-text/res/values-az-rAZ/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-az-rAZ/donottranslate-more-keys.xml @@ -18,10 +18,14 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX --> - <string name="morekeys_a">â</string> - <!-- U+0259: "ə" LATIN SMALL LETTER SCHWA --> - <string name="morekeys_e">ə</string> + <!-- This is the same as Turkish --> + <!-- U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX + U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS + U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE --> + <string name="morekeys_a">â,ä,á</string> + <!-- U+0259: "ə" LATIN SMALL LETTER SCHWA + U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE --> + <string name="morekeys_e">ə,é</string> <!-- U+0131: "ı" LATIN SMALL LETTER DOTLESS I U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS @@ -52,8 +56,15 @@ <string name="morekeys_s">ş,ß,ś,š</string> <!-- U+011F: "ğ" LATIN SMALL LETTER G WITH BREVE --> <string name="morekeys_g">ğ</string> + <!-- U+0148: "ň" LATIN SMALL LETTER N WITH CARON + U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE --> + <string name="morekeys_n">ň,ñ</string> <!-- U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA U+0107: "ć" LATIN SMALL LETTER C WITH ACUTE U+010D: "č" LATIN SMALL LETTER C WITH CARON --> <string name="morekeys_c">ç,ć,č</string> + <!-- U+00FD: "ý" LATIN SMALL LETTER Y WITH ACUTE --> + <string name="morekeys_y">ý</string> + <!-- U+017E: "ž" LATIN SMALL LETTER Z WITH CARON --> + <string name="morekeys_z">ž</string> </resources> diff --git a/tools/make-keyboard-text/res/values-bn-rBD/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-bn-rBD/donottranslate-more-keys.xml new file mode 100644 index 000000000..4955cd46a --- /dev/null +++ b/tools/make-keyboard-text/res/values-bn-rBD/donottranslate-more-keys.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2014, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> +<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <!-- Label for "switch to alphabetic" key. + U+0995: "क" BENGALI LETTER KA + U+0996: "ख" BENGALI LETTER KHA + U+0997: "ग" BENGALI LETTER GA --> + <string name="keylabel_to_alpha">কখগ</string> + <!-- U+09F3: "৳" BENGALI RUPEE SIGN --> + <string name="keyspec_currency">৳</string> +</resources> diff --git a/tools/make-keyboard-text/res/values-da/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-da/donottranslate-more-keys.xml index c22e26275..98abb0586 100644 --- a/tools/make-keyboard-text/res/values-da/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-da/donottranslate-more-keys.xml @@ -18,26 +18,30 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE + <!-- U+00E5: "å" LATIN SMALL LETTER A WITH RING ABOVE + U+00E6: "æ" LATIN SMALL LETTER AE + U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX U+00E3: "ã" LATIN SMALL LETTER A WITH TILDE U+0101: "ā" LATIN SMALL LETTER A WITH MACRON --> - <string name="morekeys_a">á,ä,à,â,ã,ā</string> + <string name="morekeys_a">å,æ,á,ä,à,â,ã,ā</string> <!-- U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE U+00EB: "ë" LATIN SMALL LETTER E WITH DIAERESIS --> <string name="morekeys_e">é,ë</string> <!-- U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS --> <string name="morekeys_i">í,ï</string> - <!-- U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE + <!-- U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE + U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS + U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE U+0153: "œ" LATIN SMALL LIGATURE OE U+014D: "ō" LATIN SMALL LETTER O WITH MACRON --> - <string name="morekeys_o">ó,ô,ò,õ,œ,ō</string> + <string name="morekeys_o">ø,ö,ó,ô,ò,õ,œ,ō</string> <!-- U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX diff --git a/tools/make-keyboard-text/res/values-et-rEE/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-et-rEE/donottranslate-more-keys.xml index 9a8fa3c59..79266e823 100644 --- a/tools/make-keyboard-text/res/values-et-rEE/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-et-rEE/donottranslate-more-keys.xml @@ -72,7 +72,6 @@ U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE U+0144: "ń" LATIN SMALL LETTER N WITH ACUTE --> <string name="morekeys_n">ņ,ñ,ń</string> - <!-- U+010D: "č" LATIN SMALL LETTER C WITH CARON U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA U+0107: "ć" LATIN SMALL LETTER C WITH ACUTE --> diff --git a/tools/make-keyboard-text/res/values-fi/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-fi/donottranslate-more-keys.xml index 82b847262..b06d9e49d 100644 --- a/tools/make-keyboard-text/res/values-fi/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-fi/donottranslate-more-keys.xml @@ -18,21 +18,24 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E6: "æ" LATIN SMALL LETTER AE + <!-- U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS + U+00E5: "å" LATIN SMALL LETTER A WITH RING ABOVE + U+00E6: "æ" LATIN SMALL LETTER AE U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX U+00E3: "ã" LATIN SMALL LETTER A WITH TILDE U+0101: "ā" LATIN SMALL LETTER A WITH MACRON --> - <string name="morekeys_a">æ,à,á,â,ã,ā</string> - <!-- U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE + <string name="morekeys_a">ä,å,æ,à,á,â,ã,ā</string> + <!-- U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS + U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE U+0153: "œ" LATIN SMALL LIGATURE OE U+014D: "ō" LATIN SMALL LETTER O WITH MACRON --> - <string name="morekeys_o">ø,ô,ò,ó,õ,œ,ō</string> + <string name="morekeys_o">ö,ø,ô,ò,ó,õ,œ,ō</string> <!-- U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS --> <string name="morekeys_u">ü</string> <!-- U+0161: "š" LATIN SMALL LETTER S WITH CARON diff --git a/tools/make-keyboard-text/res/values-hi-rZZ/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-hi-rZZ/donottranslate-more-keys.xml new file mode 100644 index 000000000..50834e0fc --- /dev/null +++ b/tools/make-keyboard-text/res/values-hi-rZZ/donottranslate-more-keys.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2014, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> +<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <!-- U+20B9: "₹" INDIAN RUPEE SIGN --> + <string name="keyspec_currency">₹</string> + <string name="label_go_key">Go</string> + <string name="label_send_key">Send</string> + <string name="label_next_key">Next</string> + <string name="label_done_key">Done</string> + <string name="label_search_key">Search</string> + <string name="label_previous_key">Prev</string> + <string name="label_pause_key">Pause</string> + <string name="label_wait_key">Wait</string> +</resources> diff --git a/tools/make-keyboard-text/res/values-hi/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-hi/donottranslate-more-keys.xml index 55723cdd1..2a37d8ba1 100644 --- a/tools/make-keyboard-text/res/values-hi/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-hi/donottranslate-more-keys.xml @@ -57,4 +57,9 @@ <string name="additional_morekeys_symbols_0">0</string> <!-- U+20B9: "₹" INDIAN RUPEE SIGN --> <string name="keyspec_currency">₹</string> + <!-- U+0964: "।" DEVANAGARI DANDA --> + <string name="keyspec_period">।</string> + <string name="keyspec_tablet_period">।</string> + <string name="morekeys_period">"!autoColumnOrder!9,\\,,.,?,!,#,),(,/,;,',@,:,-,\",+,\\%,&"</string> + <string name="morekeys_tablet_period">"!autoColumnOrder!8,\\,,.,',#,),(,/,;,@,:,-,\",+,\\%,&"</string> </resources> diff --git a/tools/make-keyboard-text/res/values-nb/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-nb/donottranslate-more-keys.xml index c5307a98d..37f9f8a1f 100644 --- a/tools/make-keyboard-text/res/values-nb/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-nb/donottranslate-more-keys.xml @@ -18,13 +18,15 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE + <!-- U+00E5: "å" LATIN SMALL LETTER A WITH RING ABOVE + U+00E6: "æ" LATIN SMALL LETTER AE U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS + U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX U+00E3: "ã" LATIN SMALL LETTER A WITH TILDE U+0101: "ā" LATIN SMALL LETTER A WITH MACRON --> - <string name="morekeys_a">à,ä,á,â,ã,ā</string> + <string name="morekeys_a">å,æ,ä,à,á,â,ã,ā</string> <!-- U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE U+00EA: "ê" LATIN SMALL LETTER E WITH CIRCUMFLEX @@ -33,14 +35,15 @@ U+0117: "ė" LATIN SMALL LETTER E WITH DOT ABOVE U+0113: "ē" LATIN SMALL LETTER E WITH MACRON --> <string name="morekeys_e">é,è,ê,ë,ę,ė,ē</string> - <!-- U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX + <!-- U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE + U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS + U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE - U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE U+0153: "œ" LATIN SMALL LIGATURE OE U+014D: "ō" LATIN SMALL LETTER O WITH MACRON --> - <string name="morekeys_o">ô,ò,ó,ö,õ,œ,ō</string> + <string name="morekeys_o">ø,ö,ô,ò,ó,õ,œ,ō</string> <!-- U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX U+00F9: "ù" LATIN SMALL LETTER U WITH GRAVE diff --git a/tools/make-keyboard-text/res/values-ne-rNP/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-ne-rNP/donottranslate-more-keys.xml index 97c50d1b8..56b594f3a 100644 --- a/tools/make-keyboard-text/res/values-ne-rNP/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-ne-rNP/donottranslate-more-keys.xml @@ -57,4 +57,9 @@ <string name="additional_morekeys_symbols_0">0</string> <!-- U+0930/U+0941/U+002E "रु." NEPALESE RUPEE SIGN --> <string name="keyspec_currency">रु.</string> + <!-- U+0964: "।" DEVANAGARI DANDA --> + <string name="keyspec_period">।</string> + <string name="keyspec_tablet_period">।</string> + <string name="morekeys_period">"!autoColumnOrder!9,.,\\,,?,!,#,),(,/,;,',@,:,-,\",+,\\%,&"</string> + <string name="morekeys_tablet_period">"!autoColumnOrder!8,.,\\,,',#,),(,/,;,@,:,-,\",+,\\%,&"</string> </resources> diff --git a/tools/make-keyboard-text/res/values-ro/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-ro/donottranslate-more-keys.xml index 6286c7bae..834e03968 100644 --- a/tools/make-keyboard-text/res/values-ro/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-ro/donottranslate-more-keys.xml @@ -18,16 +18,16 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX + <!-- U+0103: "ă" LATIN SMALL LETTER A WITH BREVE + U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX U+00E3: "ã" LATIN SMALL LETTER A WITH TILDE - U+0103: "ă" LATIN SMALL LETTER A WITH BREVE U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS U+00E6: "æ" LATIN SMALL LETTER AE U+00E5: "å" LATIN SMALL LETTER A WITH RING ABOVE U+0101: "ā" LATIN SMALL LETTER A WITH MACRON --> - <string name="morekeys_a">â,ã,ă,à,á,ä,æ,å,ā</string> + <string name="morekeys_a">ă,â,ã,à,á,ä,æ,å,ā</string> <!-- U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS U+00EC: "ì" LATIN SMALL LETTER I WITH GRAVE diff --git a/tools/make-keyboard-text/res/values-sr-rZZ/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-sr-rZZ/donottranslate-more-keys.xml new file mode 100644 index 000000000..1168126ff --- /dev/null +++ b/tools/make-keyboard-text/res/values-sr-rZZ/donottranslate-more-keys.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2014, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> +<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <!-- U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE --> + <string name="morekeys_e">è</string> + <!-- U+00EC: "ì" LATIN SMALL LETTER I WITH GRAVE --> + <string name="morekeys_i">ì</string> + <!-- U+0161: "š" LATIN SMALL LETTER S WITH CARON --> + <string name="morekeys_s">š,%</string> + <!-- U+010D: "č" LATIN SMALL LETTER C WITH CARON + U+0107: "ć" LATIN SMALL LETTER C WITH ACUTE --> + <string name="morekeys_c">č,ć,%</string> + <!-- U+0111: "đ" LATIN SMALL LETTER D WITH STROKE --> + <string name="morekeys_d">đ,%</string> + <!-- U+017E: "ž" LATIN SMALL LETTER Z WITH CARON --> + <string name="morekeys_z">ž,%</string> + <string name="label_go_key">"Idi"</string> + <string name="label_send_key">"Šalji"</string> + <string name="label_next_key">"Sled"</string> + <string name="label_done_key">"Gotov"</string> + <string name="label_search_key">"Traži"</string> + <string name="label_previous_key">"Preth"</string> + <string name="label_pause_key">"Pauza"</string> + <string name="label_wait_key">"Čekaj"</string> +</resources> diff --git a/tools/make-keyboard-text/res/values-sv/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-sv/donottranslate-more-keys.xml index ead514026..832e438f3 100644 --- a/tools/make-keyboard-text/res/values-sv/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-sv/donottranslate-more-keys.xml @@ -18,12 +18,15 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE + <!-- U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS + U+00E5: "å" LATIN SMALL LETTER A WITH RING + U+00E6: "æ" LATIN SMALL LETTER AE + U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX U+0105: "ą" LATIN SMALL LETTER A WITH OGONEK U+00E3: "ã" LATIN SMALL LETTER A WITH TILDE --> - <string name="morekeys_a">á,à,â,ą,ã</string> + <string name="morekeys_a">ä,å,æ,á,à,â,ą,ã</string> <!-- U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA U+0107: "ć" LATIN SMALL LETTER C WITH ACUTE U+010D: "č" LATIN SMALL LETTER C WITH CARON --> @@ -48,12 +51,15 @@ U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE U+0148: "ň" LATIN SMALL LETTER N WITH CARON --> <string name="morekeys_n">ń,ñ,ň</string> - <!-- U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE + <!-- U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS + U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE + U+0153: "œ" LATIN SMALL LIGATURE OE + U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE U+014D: "ō" LATIN SMALL LETTER O WITH MACRON --> - <string name="morekeys_o">ó,ò,ô,õ,ō</string> + <string name="morekeys_o">ö,ø,œ,ó,ò,ô,õ,ō</string> <!-- U+0159: "ř" LATIN SMALL LETTER R WITH CARON --> <string name="morekeys_r">ř</string> <!-- U+015B: "ś" LATIN SMALL LETTER S WITH ACUTE diff --git a/tools/make-keyboard-text/res/values-tr/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-tr/donottranslate-more-keys.xml index db1108ff6..2398430e1 100644 --- a/tools/make-keyboard-text/res/values-tr/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-tr/donottranslate-more-keys.xml @@ -18,8 +18,13 @@ */ --> <resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> - <!-- U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX --> - <string name="morekeys_a">â</string> + <!-- U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX + U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS + U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE --> + <string name="morekeys_a">â,ä,á</string> + <!-- U+0259: "ə" LATIN SMALL LETTER SCHWA + U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE --> + <string name="morekeys_e">ə,é</string> <!-- U+0131: "ı" LATIN SMALL LETTER DOTLESS I U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS @@ -50,8 +55,15 @@ <string name="morekeys_s">ş,ß,ś,š</string> <!-- U+011F: "ğ" LATIN SMALL LETTER G WITH BREVE --> <string name="morekeys_g">ğ</string> + <!-- U+0148: "ň" LATIN SMALL LETTER N WITH CARON + U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE --> + <string name="morekeys_n">ň,ñ</string> <!-- U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA U+0107: "ć" LATIN SMALL LETTER C WITH ACUTE U+010D: "č" LATIN SMALL LETTER C WITH CARON --> <string name="morekeys_c">ç,ć,č</string> + <!-- U+00FD: "ý" LATIN SMALL LETTER Y WITH ACUTE --> + <string name="morekeys_y">ý</string> + <!-- U+017E: "ž" LATIN SMALL LETTER Z WITH CARON --> + <string name="morekeys_z">ž</string> </resources> diff --git a/tools/make-keyboard-text/res/values-uz-rUZ/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-uz-rUZ/donottranslate-more-keys.xml new file mode 100644 index 000000000..24dd091b5 --- /dev/null +++ b/tools/make-keyboard-text/res/values-uz-rUZ/donottranslate-more-keys.xml @@ -0,0 +1,70 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2014, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> +<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <!-- This is the same as Turkish --> + <!-- U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX + U+00E4: "ä" LATIN SMALL LETTER A WITH DIAERESIS + U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE --> + <string name="morekeys_a">â,ä,á</string> + <!-- U+0259: "ə" LATIN SMALL LETTER SCHWA + U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE --> + <string name="morekeys_e">ə,é</string> + <!-- U+0131: "ı" LATIN SMALL LETTER DOTLESS I + U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX + U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS + U+00EC: "ì" LATIN SMALL LETTER I WITH GRAVE + U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE + U+012F: "į" LATIN SMALL LETTER I WITH OGONEK + U+012B: "ī" LATIN SMALL LETTER I WITH MACRON --> + <string name="morekeys_i">ı,î,ï,ì,í,į,ī</string> + <!-- U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS + U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX + U+0153: "œ" LATIN SMALL LIGATURE OE + U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE + U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE + U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE + U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE + U+014D: "ō" LATIN SMALL LETTER O WITH MACRON --> + <string name="morekeys_o">ö,ô,œ,ò,ó,õ,ø,ō</string> + <!-- U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS + U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX + U+00F9: "ù" LATIN SMALL LETTER U WITH GRAVE + U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE + U+016B: "ū" LATIN SMALL LETTER U WITH MACRON --> + <string name="morekeys_u">ü,û,ù,ú,ū</string> + <!-- U+015F: "ş" LATIN SMALL LETTER S WITH CEDILLA + U+00DF: "ß" LATIN SMALL LETTER SHARP S + U+015B: "ś" LATIN SMALL LETTER S WITH ACUTE + U+0161: "š" LATIN SMALL LETTER S WITH CARON --> + <string name="morekeys_s">ş,ß,ś,š</string> + <!-- U+011F: "ğ" LATIN SMALL LETTER G WITH BREVE --> + <string name="morekeys_g">ğ</string> + <!-- U+0148: "ň" LATIN SMALL LETTER N WITH CARON + U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE --> + <string name="morekeys_n">ň,ñ</string> + <!-- U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA + U+0107: "ć" LATIN SMALL LETTER C WITH ACUTE + U+010D: "č" LATIN SMALL LETTER C WITH CARON --> + <string name="morekeys_c">ç,ć,č</string> + <!-- U+00FD: "ý" LATIN SMALL LETTER Y WITH ACUTE --> + <string name="morekeys_y">ý</string> + <!-- U+017E: "ž" LATIN SMALL LETTER Z WITH CARON --> + <string name="morekeys_z">ž</string> +</resources> diff --git a/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml index c4a1b889e..b6da7d13d 100644 --- a/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml @@ -259,4 +259,12 @@ <string name="morekeys_double_quote">!fixedColumnOrder!5,!text/double_quotes,!text/double_angle_quotes</string> <string name="morekeys_tablet_double_quote">!fixedColumnOrder!6,!text/double_quotes,!text/single_quotes,!text/double_angle_quotes,!text/single_angle_quotes</string> <string name="keyspec_emoji_action_key">!icon/emoji_action_key|!code/key_emoji</string> + <string name="label_go_key">!string/label_go_key</string> + <string name="label_send_key">!string/label_send_key</string> + <string name="label_next_key">!string/label_next_key</string> + <string name="label_done_key">!string/label_done_key</string> + <string name="label_search_key">!string/label_search_key</string> + <string name="label_previous_key">!string/label_previous_key</string> + <string name="label_pause_key">!string/label_pause_key</string> + <string name="label_wait_key">!string/label_wait_key</string> </resources> |