aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
diff options
context:
space:
mode:
authorAmin Bandali <bandali@kelar.org>2024-12-16 21:45:41 -0500
committerAmin Bandali <bandali@kelar.org>2025-01-11 14:17:35 -0500
commite9a0e66716dab4dd3184d009d8920de1961efdfa (patch)
tree02dcc096643d74645bf28459c2834c3d4a2ad7f2 /tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
parentfb3b9360d70596d7e921de8bf7d3ca99564a077e (diff)
downloadlatinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip
Rename to Kelar Keyboard (org.kelar.inputmethod.latin)
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java')
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java292
1 files changed, 0 insertions, 292 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
deleted file mode 100644
index da1b32a8b..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.define.DecoderSpecificConstants;
-import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.Stack;
-
-public final class BinaryDictIOUtils {
- private static final boolean DBG = false;
-
- private BinaryDictIOUtils() {
- // This utility class is not publicly instantiable.
- }
-
- /**
- * Returns new dictionary decoder.
- *
- * @param dictFile the dictionary file.
- * @param bufferType The type of buffer, as one of USE_* in DictDecoder.
- * @return new dictionary decoder if the dictionary file exists, otherwise null.
- */
- public static DictDecoder getDictDecoder(final File dictFile, final long offset,
- final long length, final int bufferType) {
- return new Ver4DictDecoder(dictFile);
- }
-
- public static DictDecoder getDictDecoder(final File dictFile, final long offset,
- final long length, final DictionaryBufferFactory factory) {
- return new Ver4DictDecoder(dictFile);
- }
-
- public static DictDecoder getDictDecoder(final File dictFile, final long offset,
- final long length) {
- return getDictDecoder(dictFile, offset, length, DictDecoder.USE_READONLY_BYTEBUFFER);
- }
-
- private static final class Position {
- public static final int NOT_READ_PTNODE_COUNT = -1;
-
- public int mAddress;
- public int mNumOfPtNode;
- public int mPosition;
- public int mLength;
-
- public Position(int address, int length) {
- mAddress = address;
- mLength = length;
- mNumOfPtNode = NOT_READ_PTNODE_COUNT;
- }
- }
-
- /**
- * Retrieves all node arrays without recursive call.
- */
- private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
- final int bodyOffset, final Map<Integer, String> words,
- final Map<Integer, Integer> frequencies,
- final Map<Integer, ArrayList<PendingAttribute>> bigrams) {
- int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1];
-
- Stack<Position> stack = new Stack<>();
- int index = 0;
-
- Position initPos = new Position(bodyOffset, 0);
- stack.push(initPos);
-
- while (!stack.empty()) {
- Position p = stack.peek();
-
- if (DBG) {
- MakedictLog.d("read: address=" + p.mAddress + ", numOfPtNode=" +
- p.mNumOfPtNode + ", position=" + p.mPosition + ", length=" + p.mLength);
- }
-
- if (dictDecoder.getPosition() != p.mAddress) dictDecoder.setPosition(p.mAddress);
- if (index != p.mLength) index = p.mLength;
-
- if (p.mNumOfPtNode == Position.NOT_READ_PTNODE_COUNT) {
- p.mNumOfPtNode = dictDecoder.readPtNodeCount();
- p.mAddress = dictDecoder.getPosition();
- p.mPosition = 0;
- }
- if (p.mNumOfPtNode == 0) {
- stack.pop();
- continue;
- }
- final PtNodeInfo ptNodeInfo = dictDecoder.readPtNode(p.mAddress);
- for (int i = 0; i < ptNodeInfo.mCharacters.length; ++i) {
- pushedChars[index++] = ptNodeInfo.mCharacters[i];
- }
- p.mPosition++;
- if (ptNodeInfo.isTerminal()) {// found word
- words.put(ptNodeInfo.mOriginalAddress, new String(pushedChars, 0, index));
- frequencies.put(
- ptNodeInfo.mOriginalAddress, ptNodeInfo.mProbabilityInfo.mProbability);
- if (ptNodeInfo.mBigrams != null) {
- bigrams.put(ptNodeInfo.mOriginalAddress, ptNodeInfo.mBigrams);
- }
- }
-
- if (p.mPosition == p.mNumOfPtNode) {
- stack.pop();
- } else {
- // The PtNode array has more PtNodes.
- p.mAddress = dictDecoder.getPosition();
- }
-
- if (hasChildrenAddress(ptNodeInfo.mChildrenAddress)) {
- final Position childrenPos = new Position(ptNodeInfo.mChildrenAddress, index);
- stack.push(childrenPos);
- }
- }
- }
-
- /**
- * Reads unigrams and bigrams from the binary file.
- * Doesn't store a full memory representation of the dictionary.
- *
- * @param dictDecoder the dict decoder.
- * @param words the map to store the address as a key and the word as a value.
- * @param frequencies the map to store the address as a key and the frequency as a value.
- * @param bigrams the map to store the address as a key and the list of address as a value.
- * @throws IOException if the file can't be read.
- * @throws UnsupportedFormatException if the format of the file is not recognized.
- */
- /* package */ static void readUnigramsAndBigramsBinary(final DictDecoder dictDecoder,
- final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
- final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
- UnsupportedFormatException {
- // Read header
- final DictionaryHeader header = dictDecoder.readHeader();
- readUnigramsAndBigramsBinaryInner(dictDecoder, header.mBodyOffset, words,
- frequencies, bigrams);
- }
-
- /**
- * Gets the address of the last PtNode of the exact matching word in the dictionary.
- * If no match is found, returns NOT_VALID_WORD.
- *
- * @param dictDecoder the dict decoder.
- * @param word the word we search for.
- * @return the address of the terminal node.
- * @throws IOException if the file can't be read.
- * @throws UnsupportedFormatException if the format of the file is not recognized.
- */
- @UsedForTesting
- /* package */ static int getTerminalPosition(final DictDecoder dictDecoder,
- final String word) throws IOException, UnsupportedFormatException {
- if (word == null) return FormatSpec.NOT_VALID_WORD;
- dictDecoder.setPosition(0);
- dictDecoder.readHeader();
- int wordPos = 0;
- final int wordLen = word.codePointCount(0, word.length());
- for (int depth = 0; depth < DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
- if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
-
- do {
- final int ptNodeCount = dictDecoder.readPtNodeCount();
- boolean foundNextPtNode = false;
- for (int i = 0; i < ptNodeCount; ++i) {
- final int ptNodePos = dictDecoder.getPosition();
- final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos);
- boolean same = true;
- for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
- p < currentInfo.mCharacters.length;
- ++p, j = word.offsetByCodePoints(j, 1)) {
- if (wordPos + p >= wordLen
- || word.codePointAt(j) != currentInfo.mCharacters[p]) {
- same = false;
- break;
- }
- }
-
- if (same) {
- // found the PtNode matches the word.
- if (wordPos + currentInfo.mCharacters.length == wordLen) {
- return currentInfo.isTerminal() ? ptNodePos : FormatSpec.NOT_VALID_WORD;
- }
- wordPos += currentInfo.mCharacters.length;
- if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
- return FormatSpec.NOT_VALID_WORD;
- }
- foundNextPtNode = true;
- dictDecoder.setPosition(currentInfo.mChildrenAddress);
- break;
- }
- }
- if (foundNextPtNode) break;
- return FormatSpec.NOT_VALID_WORD;
- } while(true);
- }
- return FormatSpec.NOT_VALID_WORD;
- }
-
- /**
- * Writes a PtNodeCount to the stream.
- *
- * @param destination the stream to write.
- * @param ptNodeCount the count.
- * @return the size written in bytes.
- */
- @UsedForTesting
- static int writePtNodeCount(final OutputStream destination, final int ptNodeCount)
- throws IOException {
- final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
- // the count must fit on one byte or two bytes.
- // Please see comments in FormatSpec.
- if (countSize != 1 && countSize != 2) {
- throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize);
- }
- final int encodedPtNodeCount = (countSize == 2) ?
- (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
- BinaryDictEncoderUtils.writeUIntToStream(destination, encodedPtNodeCount, countSize);
- return countSize;
- }
-
- /**
- * Helper method to hide the actual value of the no children address.
- */
- public static boolean hasChildrenAddress(final int address) {
- return FormatSpec.NO_CHILDREN_ADDRESS != address;
- }
-
- /**
- * Compute the binary size of the node count
- * @param count the node count
- * @return the size of the node count, either 1 or 2 bytes.
- */
- public static int getPtNodeCountSize(final int count) {
- if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= count) {
- return 1;
- } else if (FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY >= count) {
- return 2;
- } else {
- throw new RuntimeException("Can't have more than "
- + FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY + " PtNode in a PtNodeArray (found "
- + count + ")");
- }
- }
-
- static int getChildrenAddressSize(final int optionFlags) {
- switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
- return 1;
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
- return 2;
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
- return 3;
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
- default:
- return 0;
- }
- }
-
- /**
- * Calculate bigram frequency from compressed value
- *
- * @param unigramFrequency
- * @param bigramFrequency compressed frequency
- * @return approximate bigram frequency
- */
- @UsedForTesting
- public static int reconstructBigramFrequency(final int unigramFrequency,
- final int bigramFrequency) {
- final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
- / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
- final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
- return (int)resultFreqFloat;
- }
-}