diff options
Diffstat (limited to 'java/src')
6 files changed, 308 insertions, 131 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 216492b4d..8109321b6 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -225,20 +225,26 @@ public final class BinaryDictDecoderUtils { * * @param buffer the OutputStream to write to. * @param word the string to write. + * @return the size written, in bytes. */ - static void writeString(final OutputStream buffer, final String word) throws IOException { + static int writeString(final OutputStream buffer, final String word) throws IOException { final int length = word.length(); + int written = 0; for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { + final int charSize = getCharSize(codePoint); + if (1 == charSize) { buffer.write((byte) codePoint); } else { buffer.write((byte) (0xFF & (codePoint >> 16))); buffer.write((byte) (0xFF & (codePoint >> 8))); buffer.write((byte) (0xFF & codePoint)); } + written += charSize; } buffer.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR); + written += FormatSpec.PTNODE_TERMINATOR_SIZE; + return written; } /** diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 0f7d2f6c9..9a28629b1 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -301,35 +301,6 @@ public final class BinaryDictIOUtils { } /** - * Write a string to a stream. - * - * @param destination the stream to write. - * @param word the string to be written. - * @return the size written, in bytes. - * @throws IOException - */ - private static int writeString(final OutputStream destination, final String word) - throws IOException { - int size = 0; - final int length = word.length(); - for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (CharEncoding.getCharSize(codePoint) == 1) { - destination.write((byte)codePoint); - size++; - } else { - destination.write((byte)(0xFF & (codePoint >> 16))); - destination.write((byte)(0xFF & (codePoint >> 8))); - destination.write((byte)(0xFF & codePoint)); - size += 3; - } - } - destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR); - size += FormatSpec.PTNODE_TERMINATOR_SIZE; - return size; - } - - /** * Write a PtNode to an output stream from a PtNodeInfo. * A PtNode is an in-memory representation of a node in the patricia trie. * A PtNode info is a container for low-level information about how the @@ -387,7 +358,7 @@ public final class BinaryDictIOUtils { destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags( shortcutIterator.hasNext(), target.mFrequency)); size++; - size += writeString(destination, target.mWord); + size += CharEncoding.writeString(destination, target.mWord); } } diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java new file mode 100644 index 000000000..00f401ea7 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * An auxiliary class for reading SparseTable and data written by SparseTableContentWriter. + */ +public class SparseTableContentReader { + + /** + * An interface of a function which is passed to SparseTableContentReader.read. + */ + public interface SparseTableContentReaderInterface { + /** + * Reads data. + * + * @param buffer the DictBuffer. The position of the buffer is set to the head of data. + */ + public void read(final DictBuffer buffer); + } + + private final int mContentCount; + private final int mBlockSize; + protected final File mBaseDir; + private final File mLookupTableFile; + private final File[] mAddressTableFiles; + private final File[] mContentFiles; + private DictBuffer mLookupTableBuffer; + private final DictBuffer[] mAddressTableBuffers; + private final DictBuffer[] mContentBuffers; + private final DictionaryBufferFactory mFactory; + + /** + * Sole constructor of SparseTableContentReader. + * + * @param name the name of SparseTable. + * @param blockSize the block size of the content table. + * @param baseDir the directory which contains the files of the content table. + * @param contentFilenames the file names of content files. + * @param contentIds the ids of contents. These ids are used for a suffix of a name of + * address files and content files. + * @param factory the DictionaryBufferFactory which is used for opening the files. + */ + public SparseTableContentReader(final String name, final int blockSize, final File baseDir, + final String[] contentFilenames, final String[] contentIds, + final DictionaryBufferFactory factory) { + if (contentFilenames.length != contentIds.length) { + throw new RuntimeException("The length of contentFilenames and the length of" + + " contentIds are different " + contentFilenames.length + ", " + + contentIds.length); + } + mBlockSize = blockSize; + mBaseDir = baseDir; + mFactory = factory; + mContentCount = contentFilenames.length; + mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); + mAddressTableFiles = new File[mContentCount]; + mContentFiles = new File[mContentCount]; + for (int i = 0; i < mContentCount; ++i) { + mAddressTableFiles[i] = new File(mBaseDir, + name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]); + mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]); + } + mAddressTableBuffers = new DictBuffer[mContentCount]; + mContentBuffers = new DictBuffer[mContentCount]; + } + + public void openBuffers() throws FileNotFoundException, IOException { + mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile); + for (int i = 0; i < mContentCount; ++i) { + mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]); + mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]); + } + } + + protected void read(final int contentIndex, final int index, + final SparseTableContentReaderInterface reader) { + if (index < 0 || (index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES + >= mLookupTableBuffer.limit()) { + return; + } + + mLookupTableBuffer.position((index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); + final int posInAddressTable = mLookupTableBuffer.readInt(); + if (posInAddressTable == SparseTable.NOT_EXIST) { + return; + } + + mAddressTableBuffers[contentIndex].position( + (posInAddressTable + index % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); + final int address = mAddressTableBuffers[contentIndex].readInt(); + if (address == SparseTable.NOT_EXIST) { + return; + } + + mContentBuffers[contentIndex].position(address); + reader.read(mContentBuffers[contentIndex]); + } +}
\ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentWriter.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentWriter.java new file mode 100644 index 000000000..49f0fd624 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentWriter.java @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * An auxiliary class for writing data associated with SparseTable to files. + */ +public class SparseTableContentWriter { + public interface SparseTableContentWriterInterface { + public void write(final OutputStream outStream) throws IOException; + } + + private final int mContentCount; + private final SparseTable mSparseTable; + private final File mLookupTableFile; + protected final File mBaseDir; + private final File[] mAddressTableFiles; + private final File[] mContentFiles; + protected final OutputStream[] mContentOutStreams; + + /** + * Sole constructor of SparseTableContentWriter. + * + * @param name the name of SparseTable. + * @param initialCapacity the initial capacity of SparseTable. + * @param blockSize the block size of the content table. + * @param baseDir the directory which contains the files of the content table. + * @param contentFilenames the file names of content files. + * @param contentIds the ids of contents. These ids are used for a suffix of a name of address + * files and content files. + */ + public SparseTableContentWriter(final String name, final int initialCapacity, + final int blockSize, final File baseDir, final String[] contentFilenames, + final String[] contentIds) { + if (contentFilenames.length != contentIds.length) { + throw new RuntimeException("The length of contentFilenames and the length of" + + " contentIds are different " + contentFilenames.length + ", " + + contentIds.length); + } + mContentCount = contentFilenames.length; + mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount); + mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); + mAddressTableFiles = new File[mContentCount]; + mContentFiles = new File[mContentCount]; + mBaseDir = baseDir; + for (int i = 0; i < mContentCount; ++i) { + mAddressTableFiles[i] = new File(mBaseDir, + name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]); + mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]); + } + mContentOutStreams = new OutputStream[mContentCount]; + } + + public void openStreams() throws FileNotFoundException { + for (int i = 0; i < mContentCount; ++i) { + mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]); + } + } + + protected void write(final int contentIndex, final int index, + final SparseTableContentWriterInterface writer) throws IOException { + mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length()); + writer.write(mContentOutStreams[contentIndex]); + mContentOutStreams[contentIndex].flush(); + } + + public void closeStreams() throws IOException { + mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles); + for (int i = 0; i < mContentCount; ++i) { + mContentOutStreams[i].close(); + } + } +}
\ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 734223ec2..64538c102 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -51,9 +51,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder { protected DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; private DictBuffer mTerminalAddressTableBuffer; - private DictBuffer mBigramBuffer; private DictBuffer mShortcutBuffer; - private SparseTable mBigramAddressTable; + private BigramContentReader mBigramReader; private SparseTable mShortcutAddressTable; @UsedForTesting @@ -108,8 +107,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder { mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); - mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ)); - loadBigramAddressSparseTable(); + mBigramReader = new BigramContentReader(mDictDirectory.getName(), + mDictDirectory, mBufferFactory, false); + mBigramReader.openBuffers(); mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT)); loadShortcutAddressSparseTable(); } @@ -136,16 +136,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder { return header; } - private void loadBigramAddressSparseTable() throws IOException { - final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); - final File freqsFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX - + FormatSpec.BIGRAM_FREQ_CONTENT_ID); - mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile }, - FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); - } - // TODO: Let's have something like SparseTableContentsReader in this class. private void loadShortcutAddressSparseTable() throws IOException { final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() @@ -161,6 +151,77 @@ public class Ver4DictDecoder extends AbstractDictDecoder { FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); } + /** + * An auxiliary class for reading bigrams. + */ + protected static class BigramContentReader extends SparseTableContentReader { + private final boolean mHasTimestamp; + + public BigramContentReader(final String name, final File baseDir, + final DictionaryBufferFactory factory, final boolean hasTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory); + mHasTimestamp = hasTimestamp; + } + + // TODO: Consolidate this method and BigramContentWriter.getContentFilenames. + private static String[] getContentFilenames(final String name, final boolean hasTimestamp) { + final String[] contentFilenames; + if (hasTimestamp) { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, + name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } else { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } + return contentFilenames; + } + + // TODO: Consolidate this method and BigramContentWriter.getContentIds. + private static String[] getContentIds(final boolean hasTimestamp) { + final String[] contentIds; + if (hasTimestamp) { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, + FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; + } else { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; + } + return contentIds; + } + + public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId, + final DictBuffer terminalAddressTableBuffer) { + final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); + read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, + new SparseTableContentReaderInterface() { + @Override + public void read(final DictBuffer buffer) { + while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, + // remaining bigram entries are ignored. + final int bigramFlags = buffer.readUnsignedByte(); + final int targetTerminalId = buffer.readUnsignedInt24(); + terminalAddressTableBuffer.position( + targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24(); + bigrams.add(new PendingAttribute( + bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + targetAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { + break; + } + } + if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() + + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); + } + } + }); + if (bigrams.isEmpty()) return null; + return bigrams; + } + } + protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); @@ -240,32 +301,10 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); + final ArrayList<PendingAttribute> bigrams = + mBigramReader.readTargetsAndFrequencies(terminalId, + mTerminalAddressTableBuffer); - final ArrayList<PendingAttribute> bigrams; - if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { - bigrams = new ArrayList<PendingAttribute>(); - final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId); - mBigramBuffer.position(posOfBigrams); - while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, - // remaining bigram entries are ignored. - final int bigramFlags = mBigramBuffer.readUnsignedByte(); - final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); - mTerminalAddressTableBuffer.position( - targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - targetAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() - + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); - } - } else { - bigrams = null; - } return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, parentAddress, childrenAddress, shortcutTargets, bigrams); } @@ -318,10 +357,14 @@ public class Ver4DictDecoder extends AbstractDictDecoder { @Override public boolean readAndFollowForwardLink() { - final int nextAddress = mDictBuffer.readUnsignedInt24(); - if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) { - mDictBuffer.position(nextAddress); - return true; + final int forwardLinkPos = mDictBuffer.position(); + int nextRelativePos = BinaryDictDecoderUtils.readSInt24(mDictBuffer); + if (nextRelativePos != FormatSpec.NO_FORWARD_LINK_ADDRESS) { + final int nextPos = forwardLinkPos + nextRelativePos; + if (nextPos >= 0 && nextPos < mDictBuffer.limit()) { + mDictBuffer.position(nextPos); + return true; + } } return false; } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 5d5ab0462..842427434 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -57,62 +57,6 @@ public class Ver4DictEncoder implements DictEncoder { mDictPlacedDir = dictPlacedDir; } - private interface SparseTableContentWriterInterface { - public void write(final OutputStream outStream) throws IOException; - } - - private static class SparseTableContentWriter { - private final int mContentCount; - private final SparseTable mSparseTable; - private final File mLookupTableFile; - protected final File mBaseDir; - private final File[] mAddressTableFiles; - private final File[] mContentFiles; - protected final OutputStream[] mContentOutStreams; - - public SparseTableContentWriter(final String name, final int initialCapacity, - final int blockSize, final File baseDir, final String[] contentFilenames, - final String[] contentIds) { - if (contentFilenames.length != contentIds.length) { - throw new RuntimeException("The length of contentFilenames and the length of" - + " contentIds are different " + contentFilenames.length + ", " - + contentIds.length); - } - mContentCount = contentFilenames.length; - mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount); - mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); - mAddressTableFiles = new File[mContentCount]; - mContentFiles = new File[mContentCount]; - mBaseDir = baseDir; - for (int i = 0; i < mContentCount; ++i) { - mAddressTableFiles[i] = new File(mBaseDir, - name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]); - mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]); - } - mContentOutStreams = new OutputStream[mContentCount]; - } - - public void openStreams() throws FileNotFoundException { - for (int i = 0; i < mContentCount; ++i) { - mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]); - } - } - - protected void write(final int contentIndex, final int index, - final SparseTableContentWriterInterface writer) throws IOException { - mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length()); - writer.write(mContentOutStreams[contentIndex]); - mContentOutStreams[contentIndex].flush(); - } - - public void closeStreams() throws IOException { - mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles); - for (int i = 0; i < mContentCount; ++i) { - mContentOutStreams[i].close(); - } - } - } - private static class BigramContentWriter extends SparseTableContentWriter { private final boolean mWriteTimestamp; |