aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-03-27 15:30:32 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-03-27 15:30:32 +0900
commit93cda5bb396c22f1781e390debaf75d54cf7c0dc (patch)
tree391903a9f8875520980da23f1bc5911c8fa78fc2 /java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
parent37b9562fd7b593c90d7ab383ec650f39a7c0f621 (diff)
downloadlatinime-93cda5bb396c22f1781e390debaf75d54cf7c0dc.tar.gz
latinime-93cda5bb396c22f1781e390debaf75d54cf7c0dc.tar.xz
latinime-93cda5bb396c22f1781e390debaf75d54cf7c0dc.zip
Move code only used for dicttool and tests under tests.
Bug: 13035567 Change-Id: I13c6df013ef2b67c9bf67455d9c32d283bf9ea2e
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java365
1 files changed, 0 insertions, 365 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
deleted file mode 100644
index b534bcb09..000000000
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-/**
- * Decodes binary files for a FusionDictionary.
- *
- * All the methods in this class are static.
- *
- * TODO: Remove calls from classes except Ver3DictDecoder
- * TODO: Move this file to makedict/internal.
- * TODO: Rename this class to DictDecoderUtils.
- */
-public final class BinaryDictDecoderUtils {
-
- private static final boolean DBG = MakedictLog.DBG;
-
- private BinaryDictDecoderUtils() {
- // This utility class is not publicly instantiable.
- }
-
- @UsedForTesting
- public interface DictBuffer {
- public int readUnsignedByte();
- public int readUnsignedShort();
- public int readUnsignedInt24();
- public int readInt();
- public int position();
- public void position(int newPosition);
- @UsedForTesting
- public void put(final byte b);
- public int limit();
- @UsedForTesting
- public int capacity();
- }
-
- public static final class ByteBufferDictBuffer implements DictBuffer {
- private ByteBuffer mBuffer;
-
- public ByteBufferDictBuffer(final ByteBuffer buffer) {
- mBuffer = buffer;
- }
-
- @Override
- public int readUnsignedByte() {
- return mBuffer.get() & 0xFF;
- }
-
- @Override
- public int readUnsignedShort() {
- return mBuffer.getShort() & 0xFFFF;
- }
-
- @Override
- public int readUnsignedInt24() {
- final int retval = readUnsignedByte();
- return (retval << 16) + readUnsignedShort();
- }
-
- @Override
- public int readInt() {
- return mBuffer.getInt();
- }
-
- @Override
- public int position() {
- return mBuffer.position();
- }
-
- @Override
- public void position(int newPos) {
- mBuffer.position(newPos);
- }
-
- @Override
- public void put(final byte b) {
- mBuffer.put(b);
- }
-
- @Override
- public int limit() {
- return mBuffer.limit();
- }
-
- @Override
- public int capacity() {
- return mBuffer.capacity();
- }
- }
-
- /**
- * A class grouping utility function for our specific character encoding.
- */
- static final class CharEncoding {
- private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
- private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
-
- /**
- * Helper method to find out whether this code fits on one byte
- */
- private static boolean fitsOnOneByte(final int character) {
- return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
- && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
- }
-
- /**
- * Compute the size of a character given its character code.
- *
- * Char format is:
- * 1 byte = bbbbbbbb match
- * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
- * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
- * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
- * 00011111 would be outside unicode.
- * else: iso-latin-1 code
- * This allows for the whole unicode range to be encoded, including chars outside of
- * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
- * characters which should never happen anyway (and still work, but take 3 bytes).
- *
- * @param character the character code.
- * @return the size in binary encoded-form, either 1 or 3 bytes.
- */
- static int getCharSize(final int character) {
- // See char encoding in FusionDictionary.java
- if (fitsOnOneByte(character)) return 1;
- if (FormatSpec.INVALID_CHARACTER == character) return 1;
- return 3;
- }
-
- /**
- * Compute the byte size of a character array.
- */
- static int getCharArraySize(final int[] chars) {
- int size = 0;
- for (int character : chars) size += getCharSize(character);
- return size;
- }
-
- /**
- * Writes a char array to a byte buffer.
- *
- * @param codePoints the code point array to write.
- * @param buffer the byte buffer to write to.
- * @param index the index in buffer to write the character array to.
- * @return the index after the last character.
- */
- static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
- for (int codePoint : codePoints) {
- if (1 == getCharSize(codePoint)) {
- buffer[index++] = (byte)codePoint;
- } else {
- buffer[index++] = (byte)(0xFF & (codePoint >> 16));
- buffer[index++] = (byte)(0xFF & (codePoint >> 8));
- buffer[index++] = (byte)(0xFF & codePoint);
- }
- }
- return index;
- }
-
- /**
- * Writes a string with our character format to a byte buffer.
- *
- * This will also write the terminator byte.
- *
- * @param buffer the byte buffer to write to.
- * @param origin the offset to write from.
- * @param word the string to write.
- * @return the size written, in bytes.
- */
- static int writeString(final byte[] buffer, final int origin, final String word) {
- final int length = word.length();
- int index = origin;
- for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
- final int codePoint = word.codePointAt(i);
- if (1 == getCharSize(codePoint)) {
- buffer[index++] = (byte)codePoint;
- } else {
- buffer[index++] = (byte)(0xFF & (codePoint >> 16));
- buffer[index++] = (byte)(0xFF & (codePoint >> 8));
- buffer[index++] = (byte)(0xFF & codePoint);
- }
- }
- buffer[index++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
- return index - origin;
- }
-
- /**
- * Writes a string with our character format to an OutputStream.
- *
- * This will also write the terminator byte.
- *
- * @param stream the OutputStream to write to.
- * @param word the string to write.
- * @return the size written, in bytes.
- */
- static int writeString(final OutputStream stream, final String word) throws IOException {
- final int length = word.length();
- int written = 0;
- for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
- final int codePoint = word.codePointAt(i);
- final int charSize = getCharSize(codePoint);
- if (1 == charSize) {
- stream.write((byte) codePoint);
- } else {
- stream.write((byte) (0xFF & (codePoint >> 16)));
- stream.write((byte) (0xFF & (codePoint >> 8)));
- stream.write((byte) (0xFF & codePoint));
- }
- written += charSize;
- }
- stream.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
- written += FormatSpec.PTNODE_TERMINATOR_SIZE;
- return written;
- }
-
- /**
- * Reads a string from a DictBuffer. This is the converse of the above method.
- */
- static String readString(final DictBuffer dictBuffer) {
- final StringBuilder s = new StringBuilder();
- int character = readChar(dictBuffer);
- while (character != FormatSpec.INVALID_CHARACTER) {
- s.appendCodePoint(character);
- character = readChar(dictBuffer);
- }
- return s.toString();
- }
-
- /**
- * Reads a character from the buffer.
- *
- * This follows the character format documented earlier in this source file.
- *
- * @param dictBuffer the buffer, positioned over an encoded character.
- * @return the character code.
- */
- static int readChar(final DictBuffer dictBuffer) {
- int character = dictBuffer.readUnsignedByte();
- if (!fitsOnOneByte(character)) {
- if (FormatSpec.PTNODE_CHARACTERS_TERMINATOR == character) {
- return FormatSpec.INVALID_CHARACTER;
- }
- character <<= 16;
- character += dictBuffer.readUnsignedShort();
- }
- return character;
- }
- }
-
- /**
- * Reads and returns the PtNode count out of a buffer and forwards the pointer.
- */
- /* package */ static int readPtNodeCount(final DictBuffer dictBuffer) {
- final int msb = dictBuffer.readUnsignedByte();
- if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= msb) {
- return msb;
- } else {
- return ((FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT & msb) << 8)
- + dictBuffer.readUnsignedByte();
- }
- }
-
- /**
- * Finds, as a string, the word at the position passed as an argument.
- *
- * @param dictDecoder the dict decoder.
- * @param headerSize the size of the header.
- * @param pos the position to seek.
- * @return the word with its frequency, as a weighted string.
- */
- @UsedForTesting
- /* package for tests */ static WeightedString getWordAtPosition(final DictDecoder dictDecoder,
- final int headerSize, final int pos) {
- final WeightedString result;
- final int originalPos = dictDecoder.getPosition();
- dictDecoder.setPosition(pos);
- result = getWordAtPositionWithoutParentAddress(dictDecoder, headerSize, pos);
- dictDecoder.setPosition(originalPos);
- return result;
- }
-
- private static WeightedString getWordAtPositionWithoutParentAddress(
- final DictDecoder dictDecoder, final int headerSize, final int pos) {
- dictDecoder.setPosition(headerSize);
- final int count = dictDecoder.readPtNodeCount();
- int groupPos = dictDecoder.getPosition();
- final StringBuilder builder = new StringBuilder();
- WeightedString result = null;
-
- PtNodeInfo last = null;
- for (int i = count - 1; i >= 0; --i) {
- PtNodeInfo info = dictDecoder.readPtNode(groupPos);
- groupPos = info.mEndAddress;
- if (info.mOriginalAddress == pos) {
- builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
- result = new WeightedString(builder.toString(), info.mProbabilityInfo);
- break; // and return
- }
- if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
- if (info.mChildrenAddress > pos) {
- if (null == last) continue;
- builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
- dictDecoder.setPosition(last.mChildrenAddress);
- i = dictDecoder.readPtNodeCount();
- groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
- last = null;
- continue;
- }
- last = info;
- }
- if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
- builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
- dictDecoder.setPosition(last.mChildrenAddress);
- i = dictDecoder.readPtNodeCount();
- groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
- last = null;
- continue;
- }
- }
- return result;
- }
-
- /**
- * Helper method to pass a file name instead of a File object to isBinaryDictionary.
- */
- public static boolean isBinaryDictionary(final String filename) {
- final File file = new File(filename);
- return isBinaryDictionary(file);
- }
-
- /**
- * Basic test to find out whether the file is a binary dictionary or not.
- *
- * @param file The file to test.
- * @return true if it's a binary dictionary, false otherwise
- */
- public static boolean isBinaryDictionary(final File file) {
- final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, 0, file.length());
- if (dictDecoder == null) {
- return false;
- }
- return dictDecoder.hasValidRawBinaryDictionary();
- }
-}