From f6b0e32df38da4e2130bdbfc8875ea2d19054caf Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Tue, 21 Oct 2014 17:31:00 +0900 Subject: Add a *FAST* dictionary header reader. It's still unused as of this change but the next change will use it As a reference point, generating the metadata for Bayo takes 3'02" on my machine with the info command; it's down to 16" if made to use this instead. The gains increases with the number of dictionaries obviously. Change-Id: I0eeea2d8f81bb74b0d1570af658e91b56f7c2b79 --- .../latin/makedict/BinaryDictDecoderUtils.java | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'tests/src') diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 120b96bc6..be75565bb 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -17,11 +17,16 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.HashMap; +import java.util.LinkedList; + +import javax.annotation.Nonnull; /** * Decodes binary files for a FusionDictionary. @@ -360,6 +365,43 @@ public final class BinaryDictDecoderUtils { return result; } + /** + * Helper method that brutally decodes a header from a byte array. + * + * @param headerBuffer a buffer containing the bytes of the header. + * @return a hashmap of the attributes stored in the header + */ + @Nonnull + public static HashMap decodeHeaderAttributes(@Nonnull final byte[] headerBuffer) + throws UnsupportedFormatException { + final StringBuilder sb = new StringBuilder(); + final LinkedList keyValues = new LinkedList<>(); + int index = 0; + while (index < headerBuffer.length) { + if (headerBuffer[index] == FormatSpec.PTNODE_CHARACTERS_TERMINATOR) { + keyValues.add(sb.toString()); + sb.setLength(0); + } else if (CharEncoding.fitsOnOneByte(headerBuffer[index] & 0xFF, + null /* codePointTable */)) { + sb.appendCodePoint(headerBuffer[index] & 0xFF); + } else { + sb.appendCodePoint(((headerBuffer[index] & 0xFF) << 16) + + ((headerBuffer[index + 1] & 0xFF) << 8) + + (headerBuffer[index + 2] & 0xFF)); + index += 2; + } + index += 1; + } + if ((keyValues.size() & 1) != 0) { + throw new UnsupportedFormatException("Odd number of attributes"); + } + final HashMap attributes = new HashMap<>(); + for (int i = 0; i < keyValues.size(); i += 2) { + attributes.put(keyValues.get(i), keyValues.get(i + 1)); + } + return attributes; + } + /** * Helper method to pass a file name instead of a File object to isBinaryDictionary. */ -- cgit v1.2.3-83-g751a