Rename to Kelar Keyboard (org.kelar.inputmethod.latin)

author: Amin Bandali <bandali@kelar.org> 2024-12-16 21:45:41 -0500
committer: Amin Bandali <bandali@kelar.org> 2025-01-11 14:17:35 -0500
commit: e9a0e66716dab4dd3184d009d8920de1961efdfa (patch)
tree: 02dcc096643d74645bf28459c2834c3d4a2ad7f2 /tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
parent: fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff)
download: latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip
1 files changed, 0 insertions, 426 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
deleted file mode 100644
index be75565bb..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.util.HashMap;
-import java.util.LinkedList;
-
-import javax.annotation.Nonnull;
-
-/**
- * Decodes binary files for a FusionDictionary.
- *
- * All the methods in this class are static.
- *
- * TODO: Move this file to makedict/internal.
- * TODO: Rename this class to DictDecoderUtils.
- */
-public final class BinaryDictDecoderUtils {
-    private BinaryDictDecoderUtils() {
-        // This utility class is not publicly instantiable.
-    }
-
-    @UsedForTesting
-    public interface DictBuffer {
-        public int readUnsignedByte();
-        public int readUnsignedShort();
-        public int readUnsignedInt24();
-        public int readInt();
-        public int position();
-        public void position(int newPosition);
-        @UsedForTesting
-        public void put(final byte b);
-        public int limit();
-        @UsedForTesting
-        public int capacity();
-    }
-
-    public static final class ByteBufferDictBuffer implements DictBuffer {
-        private ByteBuffer mBuffer;
-
-        public ByteBufferDictBuffer(final ByteBuffer buffer) {
-            mBuffer = buffer;
-        }
-
-        @Override
-        public int readUnsignedByte() {
-            return mBuffer.get() & 0xFF;
-        }
-
-        @Override
-        public int readUnsignedShort() {
-            return mBuffer.getShort() & 0xFFFF;
-        }
-
-        @Override
-        public int readUnsignedInt24() {
-            final int retval = readUnsignedByte();
-            return (retval << 16) + readUnsignedShort();
-        }
-
-        @Override
-        public int readInt() {
-            return mBuffer.getInt();
-        }
-
-        @Override
-        public int position() {
-            return mBuffer.position();
-        }
-
-        @Override
-        public void position(int newPos) {
-            mBuffer.position(newPos);
-        }
-
-        @Override
-        public void put(final byte b) {
-            mBuffer.put(b);
-        }
-
-        @Override
-        public int limit() {
-            return mBuffer.limit();
-        }
-
-        @Override
-        public int capacity() {
-            return mBuffer.capacity();
-        }
-    }
-
-    /**
-     * A class grouping utility function for our specific character encoding.
-     */
-    static final class CharEncoding {
-
-        /**
-         * Helper method to find out whether this code fits on one byte
-         */
-        private static boolean fitsOnOneByte(final int character,
-                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
-            int codePoint = character;
-            if (codePointToOneByteCodeMap != null) {
-                if (codePointToOneByteCodeMap.containsKey(character)) {
-                    codePoint = codePointToOneByteCodeMap.get(character);
-                }
-            }
-            return codePoint >= FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE
-                    && codePoint <= FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
-        }
-
-        /**
-         * Compute the size of a character given its character code.
-         *
-         * Char format is:
-         * 1 byte = bbbbbbbb match
-         * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
-         * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
-         *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
-         *       00011111 would be outside unicode.
-         * else: iso-latin-1 code
-         * This allows for the whole unicode range to be encoded, including chars outside of
-         * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
-         * characters which should never happen anyway (and still work, but take 3 bytes).
-         *
-         * @param character the character code.
-         * @return the size in binary encoded-form, either 1 or 3 bytes.
-         */
-        static int getCharSize(final int character,
-                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
-            // See char encoding in FusionDictionary.java
-            if (fitsOnOneByte(character, codePointToOneByteCodeMap)) return 1;
-            if (FormatSpec.INVALID_CHARACTER == character) return 1;
-            return 3;
-        }
-
-        /**
-         * Compute the byte size of a character array.
-         */
-        static int getCharArraySize(final int[] chars,
-                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
-            int size = 0;
-            for (int character : chars) size += getCharSize(character, codePointToOneByteCodeMap);
-            return size;
-        }
-
-        /**
-         * Writes a char array to a byte buffer.
-         *
-         * @param codePoints the code point array to write.
-         * @param buffer the byte buffer to write to.
-         * @param fromIndex the index in buffer to write the character array to.
-         * @param codePointToOneByteCodeMap the map to convert the code point.
-         * @return the index after the last character.
-         */
-        static int writeCharArray(final int[] codePoints, final byte[] buffer, final int fromIndex,
-                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
-            int index = fromIndex;
-            for (int codePoint : codePoints) {
-                if (codePointToOneByteCodeMap != null) {
-                    if (codePointToOneByteCodeMap.containsKey(codePoint)) {
-                        // Convert code points
-                        codePoint = codePointToOneByteCodeMap.get(codePoint);
-                    }
-                }
-                if (1 == getCharSize(codePoint, codePointToOneByteCodeMap)) {
-                    buffer[index++] = (byte)codePoint;
-                } else {
-                    buffer[index++] = (byte)(0xFF & (codePoint >> 16));
-                    buffer[index++] = (byte)(0xFF & (codePoint >> 8));
-                    buffer[index++] = (byte)(0xFF & codePoint);
-                }
-            }
-            return index;
-        }
-
-        /**
-         * Writes a string with our character format to a byte buffer.
-         *
-         * This will also write the terminator byte.
-         *
-         * @param buffer the byte buffer to write to.
-         * @param origin the offset to write from.
-         * @param word the string to write.
-         * @return the size written, in bytes.
-         */
-        static int writeString(final byte[] buffer, final int origin, final String word,
-                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
-            final int length = word.length();
-            int index = origin;
-            for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
-                int codePoint = word.codePointAt(i);
-                if (codePointToOneByteCodeMap != null) {
-                    if (codePointToOneByteCodeMap.containsKey(codePoint)) {
-                        // Convert code points
-                        codePoint = codePointToOneByteCodeMap.get(codePoint);
-                    }
-                }
-                if (1 == getCharSize(codePoint, codePointToOneByteCodeMap)) {
-                    buffer[index++] = (byte)codePoint;
-                } else {
-                    buffer[index++] = (byte)(0xFF & (codePoint >> 16));
-                    buffer[index++] = (byte)(0xFF & (codePoint >> 8));
-                    buffer[index++] = (byte)(0xFF & codePoint);
-                }
-            }
-            buffer[index++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
-            return index - origin;
-        }
-
-        /**
-         * Writes a string with our character format to an OutputStream.
-         *
-         * This will also write the terminator byte.
-         *
-         * @param stream the OutputStream to write to.
-         * @param word the string to write.
-         * @return the size written, in bytes.
-         */
-        static int writeString(final OutputStream stream, final String word,
-                final HashMap<Integer, Integer> codePointToOneByteCodeMap) throws IOException {
-            final int length = word.length();
-            int written = 0;
-            for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
-                final int codePoint = word.codePointAt(i);
-                final int charSize = getCharSize(codePoint, codePointToOneByteCodeMap);
-                if (1 == charSize) {
-                    stream.write((byte) codePoint);
-                } else {
-                    stream.write((byte) (0xFF & (codePoint >> 16)));
-                    stream.write((byte) (0xFF & (codePoint >> 8)));
-                    stream.write((byte) (0xFF & codePoint));
-                }
-                written += charSize;
-            }
-            stream.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
-            written += FormatSpec.PTNODE_TERMINATOR_SIZE;
-            return written;
-        }
-
-        /**
-         * Reads a string from a DictBuffer. This is the converse of the above method.
-         */
-        static String readString(final DictBuffer dictBuffer) {
-            final StringBuilder s = new StringBuilder();
-            int character = readChar(dictBuffer);
-            while (character != FormatSpec.INVALID_CHARACTER) {
-                s.appendCodePoint(character);
-                character = readChar(dictBuffer);
-            }
-            return s.toString();
-        }
-
-        /**
-         * Reads a character from the buffer.
-         *
-         * This follows the character format documented earlier in this source file.
-         *
-         * @param dictBuffer the buffer, positioned over an encoded character.
-         * @return the character code.
-         */
-        static int readChar(final DictBuffer dictBuffer) {
-            int character = dictBuffer.readUnsignedByte();
-            if (!fitsOnOneByte(character, null)) {
-                if (FormatSpec.PTNODE_CHARACTERS_TERMINATOR == character) {
-                    return FormatSpec.INVALID_CHARACTER;
-                }
-                character <<= 16;
-                character += dictBuffer.readUnsignedShort();
-            }
-            return character;
-        }
-    }
-
-    /**
-     * Reads and returns the PtNode count out of a buffer and forwards the pointer.
-     */
-    /* package */ static int readPtNodeCount(final DictBuffer dictBuffer) {
-        final int msb = dictBuffer.readUnsignedByte();
-        if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= msb) {
-            return msb;
-        }
-        return ((FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT & msb) << 8)
-                + dictBuffer.readUnsignedByte();
-    }
-
-    /**
-     * Finds, as a string, the word at the position passed as an argument.
-     *
-     * @param dictDecoder the dict decoder.
-     * @param headerSize the size of the header.
-     * @param pos the position to seek.
-     * @return the word with its frequency, as a weighted string.
-     */
-    @UsedForTesting
-    /* package for tests */ static WeightedString getWordAtPosition(final DictDecoder dictDecoder,
-            final int headerSize, final int pos) {
-        final WeightedString result;
-        final int originalPos = dictDecoder.getPosition();
-        dictDecoder.setPosition(pos);
-        result = getWordAtPositionWithoutParentAddress(dictDecoder, headerSize, pos);
-        dictDecoder.setPosition(originalPos);
-        return result;
-    }
-
-    private static WeightedString getWordAtPositionWithoutParentAddress(
-            final DictDecoder dictDecoder, final int headerSize, final int pos) {
-        dictDecoder.setPosition(headerSize);
-        final int count = dictDecoder.readPtNodeCount();
-        int groupPos = dictDecoder.getPosition();
-        final StringBuilder builder = new StringBuilder();
-        WeightedString result = null;
-
-        PtNodeInfo last = null;
-        for (int i = count - 1; i >= 0; --i) {
-            PtNodeInfo info = dictDecoder.readPtNode(groupPos);
-            groupPos = info.mEndAddress;
-            if (info.mOriginalAddress == pos) {
-                builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
-                result = new WeightedString(builder.toString(), info.mProbabilityInfo);
-                break; // and return
-            }
-            if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
-                if (info.mChildrenAddress > pos) {
-                    if (null == last) continue;
-                    builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
-                    dictDecoder.setPosition(last.mChildrenAddress);
-                    i = dictDecoder.readPtNodeCount();
-                    groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
-                    last = null;
-                    continue;
-                }
-                last = info;
-            }
-            if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
-                builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
-                dictDecoder.setPosition(last.mChildrenAddress);
-                i = dictDecoder.readPtNodeCount();
-                groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
-                last = null;
-                continue;
-            }
-        }
-        return result;
-    }
-
-    /**
-     * Helper method that brutally decodes a header from a byte array.
-     *
-     * @param headerBuffer a buffer containing the bytes of the header.
-     * @return a hashmap of the attributes stored in the header
-     */
-    @Nonnull
-    public static HashMap<String, String> decodeHeaderAttributes(@Nonnull final byte[] headerBuffer)
-            throws UnsupportedFormatException {
-        final StringBuilder sb = new StringBuilder();
-        final LinkedList<String> keyValues = new LinkedList<>();
-        int index = 0;
-        while (index < headerBuffer.length) {
-            if (headerBuffer[index] == FormatSpec.PTNODE_CHARACTERS_TERMINATOR) {
-                keyValues.add(sb.toString());
-                sb.setLength(0);
-            } else if (CharEncoding.fitsOnOneByte(headerBuffer[index] & 0xFF,
-                    null /* codePointTable */)) {
-                sb.appendCodePoint(headerBuffer[index] & 0xFF);
-            } else {
-                sb.appendCodePoint(((headerBuffer[index] & 0xFF) << 16)
-                        + ((headerBuffer[index + 1] & 0xFF) << 8)
-                        + (headerBuffer[index + 2] & 0xFF));
-                index += 2;
-            }
-            index += 1;
-        }
-        if ((keyValues.size() & 1) != 0) {
-            throw new UnsupportedFormatException("Odd number of attributes");
-        }
-        final HashMap<String, String> attributes = new HashMap<>();
-        for (int i = 0; i < keyValues.size(); i += 2) {
-            attributes.put(keyValues.get(i), keyValues.get(i + 1));
-        }
-        return attributes;
-    }
-
-    /**
-     * Helper method to pass a file name instead of a File object to isBinaryDictionary.
-     */
-    public static boolean isBinaryDictionary(final String filename) {
-        final File file = new File(filename);
-        return isBinaryDictionary(file);
-    }
-
-    /**
-     * Basic test to find out whether the file is a binary dictionary or not.
-     *
-     * @param file The file to test.
-     * @return true if it's a binary dictionary, false otherwise
-     */
-    public static boolean isBinaryDictionary(final File file) {
-        final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length());
-        if (dictDecoder == null) {
-            return false;
-        }
-        return dictDecoder.hasValidRawBinaryDictionary();
-    }
-}
author	Amin Bandali <bandali@kelar.org>	2024-12-16 21:45:41 -0500
committer	Amin Bandali <bandali@kelar.org>	2025-01-11 14:17:35 -0500
commit	e9a0e66716dab4dd3184d009d8920de1961efdfa (patch)
tree	02dcc096643d74645bf28459c2834c3d4a2ad7f2 /tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
parent	fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff)
download	latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip