diff options
Diffstat (limited to 'java/src')
8 files changed, 10 insertions, 1127 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index acabea10d..2dbb5eb93 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -58,6 +58,7 @@ public final class BinaryDictDecoderUtils { public int readInt(); public int position(); public void position(int newPosition); + @UsedForTesting public void put(final byte b); public int limit(); @UsedForTesting @@ -166,6 +167,7 @@ public final class BinaryDictDecoderUtils { return size; } + @UsedForTesting static int getCharArraySize(final int[] chars, final int start, final int end) { int size = 0; for (int i = start; i < end; ++i) { @@ -262,6 +264,7 @@ public final class BinaryDictDecoderUtils { */ // TODO: Merge this method with writeCharArray and rename the various write* methods to // make the difference clear. + @UsedForTesting static int writeCodePoints(final OutputStream stream, final int[] codePoints, final int startIndex, final int endIndex) throws IOException { diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 8ba0797de..bb40e0dd5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -16,6 +16,7 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; @@ -245,6 +246,7 @@ public class BinaryDictEncoderUtils { } } + @UsedForTesting static void writeUIntToDictBuffer(final DictBuffer dictBuffer, final int value, final int size) { switch(size) { diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index efe1b7b34..07ba777c7 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -23,7 +23,6 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; @@ -32,7 +31,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; -import java.util.Iterator; import java.util.Map; import java.util.Stack; @@ -245,6 +243,7 @@ public final class BinaryDictIOUtils { /** * @return the size written, in bytes. Always 3 bytes. */ + @UsedForTesting static int writeSInt24ToBuffer(final DictBuffer dictBuffer, final int value) { final int absValue = Math.abs(value); dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); @@ -256,6 +255,7 @@ public final class BinaryDictIOUtils { /** * @return the size written, in bytes. Always 3 bytes. */ + @UsedForTesting static int writeSInt24ToStream(final OutputStream destination, final int value) throws IOException { final int absValue = Math.abs(value); @@ -265,28 +265,7 @@ public final class BinaryDictIOUtils { return 3; } - /** - * @return the size written, in bytes. 1, 2, or 3 bytes. - */ - private static int writeVariableAddress(final OutputStream destination, final int value) - throws IOException { - switch (BinaryDictEncoderUtils.getByteSize(value)) { - case 1: - destination.write((byte)value); - break; - case 2: - destination.write((byte)(0xFF & (value >> 8))); - destination.write((byte)(0xFF & value)); - break; - case 3: - destination.write((byte)(0xFF & (value >> 16))); - destination.write((byte)(0xFF & (value >> 8))); - destination.write((byte)(0xFF & value)); - break; - } - return BinaryDictEncoderUtils.getByteSize(value); - } - + @UsedForTesting static void skipString(final DictBuffer dictBuffer, final boolean hasMultipleChars) { if (hasMultipleChars) { @@ -300,127 +279,13 @@ public final class BinaryDictIOUtils { } /** - * Write a PtNode to an output stream from a PtNodeInfo. - * A PtNode is an in-memory representation of a node in the patricia trie. - * A PtNode info is a container for low-level information about how the - * PtNode is stored in the binary format. - * - * @param destination the stream to write. - * @param info the PtNode info to be written. - * @return the size written, in bytes. - */ - private static int writePtNode(final OutputStream destination, final PtNodeInfo info) - throws IOException { - int size = FormatSpec.PTNODE_FLAGS_SIZE; - destination.write((byte)info.mFlags); - final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ? - FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress; - size += writeSInt24ToStream(destination, parentOffset); - - for (int i = 0; i < info.mCharacters.length; ++i) { - if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) { - destination.write((byte)info.mCharacters[i]); - size++; - } else { - size += writeSInt24ToStream(destination, info.mCharacters[i]); - } - } - if (info.mCharacters.length > 1) { - destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR); - size++; - } - - if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { - destination.write((byte)info.mFrequency); - size++; - } - - if (DBG) { - MakedictLog.d("writePtNode origin=" + info.mOriginalAddress + ", size=" + size - + ", child=" + info.mChildrenAddress + ", characters =" - + new String(info.mCharacters, 0, info.mCharacters.length)); - } - final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ? - 0 : info.mChildrenAddress - (info.mOriginalAddress + size); - writeSInt24ToStream(destination, childrenOffset); - size += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; - - if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) { - final int shortcutListSize = - BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); - destination.write((byte)(shortcutListSize >> 8)); - destination.write((byte)(shortcutListSize & 0xFF)); - size += 2; - final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator(); - while (shortcutIterator.hasNext()) { - final WeightedString target = shortcutIterator.next(); - destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags( - shortcutIterator.hasNext(), target.mFrequency)); - size++; - size += CharEncoding.writeString(destination, target.mWord); - } - } - - if (info.mBigrams != null) { - // TODO: Consolidate this code with the code that computes the size of the bigram list - // in BinaryDictEncoderUtils#computeActualNodeArraySize - for (int i = 0; i < info.mBigrams.size(); ++i) { - - final int bigramFrequency = info.mBigrams.get(i).mFrequency; - int bigramFlags = (i < info.mBigrams.size() - 1) - ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0; - size++; - final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress - + size); - bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0; - switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) { - case 1: - bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE; - break; - case 2: - bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES; - break; - case 3: - bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES; - break; - } - bigramFlags |= bigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY; - destination.write((byte)bigramFlags); - size += writeVariableAddress(destination, Math.abs(bigramOffset)); - } - } - return size; - } - - /** - * Compute the size of the PtNode. - */ - static int computePtNodeSize(final PtNodeInfo info, final FormatOptions formatOptions) { - int size = FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE - + BinaryDictEncoderUtils.getPtNodeCharactersSize(info.mCharacters) - + getChildrenAddressSize(info.mFlags, formatOptions); - if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { - size += FormatSpec.PTNODE_FREQUENCY_SIZE; - } - if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) { - size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); - } - if (info.mBigrams != null) { - for (final PendingAttribute attr : info.mBigrams) { - size += FormatSpec.PTNODE_FLAGS_SIZE; - size += BinaryDictEncoderUtils.getByteSize(attr.mAddress); - } - } - return size; - } - - /** * Writes a PtNodeCount to the stream. * * @param destination the stream to write. * @param ptNodeCount the count. * @return the size written in bytes. */ + @UsedForTesting static int writePtNodeCount(final OutputStream destination, final int ptNodeCount) throws IOException { final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); @@ -435,22 +300,6 @@ public final class BinaryDictIOUtils { return countSize; } - /** - * Write a node array to the stream. - * - * @param destination the stream to write. - * @param infos an array of PtNodeInfo to be written. - * @return the size written, in bytes. - * @throws IOException - */ - static int writeNodes(final OutputStream destination, final PtNodeInfo[] infos) - throws IOException { - int size = writePtNodeCount(destination, infos.length); - for (final PtNodeInfo info : infos) size += writePtNode(destination, info); - writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS); - return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; - } - private static final int HEADER_READING_BUFFER_SIZE = 16384; /** * Convenience method to read the header of a binary file. diff --git a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java deleted file mode 100644 index c4f7ec91f..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; - -import java.io.IOException; -import java.util.ArrayList; - -/** - * An interface of a binary dictionary updater. - */ -@UsedForTesting -public interface DictUpdater extends DictDecoder { - - /** - * Deletes the word from the binary dictionary. - * - * @param word the word to be deleted. - */ - @UsedForTesting - public void deleteWord(final String word) throws IOException, UnsupportedFormatException; - - /** - * Inserts a word into a binary dictionary. - * - * @param word the word to be inserted. - * @param frequency the frequency of the new word. - * @param bigramStrings bigram list, or null if none. - * @param shortcuts shortcut list, or null if none. - * @param isBlackListEntry whether this should be a blacklist entry. - */ - // TODO: Support batch insertion. - @UsedForTesting - public void insertWord(final String word, final int frequency, - final ArrayList<WeightedString> bigramStrings, - final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, - final boolean isBlackListEntry) throws IOException, UnsupportedFormatException; -} diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java deleted file mode 100644 index 4518f21b9..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -/** - * An auxiliary class for updating data associated with SparseTable. - */ -public class SparseTableContentUpdater extends SparseTableContentReader { - protected OutputStream mLookupTableOutStream; - protected OutputStream[] mAddressTableOutStreams; - protected OutputStream[] mContentOutStreams; - - public SparseTableContentUpdater(final String name, final int blockSize, - final File baseDir, final String[] contentFilenames, final String[] contentIds, - final DictionaryBufferFactory factory) { - super(name, blockSize, baseDir, contentFilenames, contentIds, factory); - mAddressTableOutStreams = new OutputStream[mContentCount]; - mContentOutStreams = new OutputStream[mContentCount]; - } - - protected void openStreamsAndBuffers() throws IOException { - openBuffers(); - mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */); - for (int i = 0; i < mContentCount; ++i) { - mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i], - true /* append */); - mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */); - } - } - - /** - * Set the contentIndex-th elements of contentId-th table. - * - * @param contentId the id of the content table. - * @param contentIndex the index where to set the valie. - * @param value the value to set. - */ - protected void setContentValue(final int contentId, final int contentIndex, final int value) - throws IOException { - if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES - >= mLookupTableBuffer.limit()) { - // Need to extend the lookup table - final int currentSize = mLookupTableBuffer.limit() - / SparseTable.SIZE_OF_INT_IN_BYTES; - final int target = contentIndex / mBlockSize + 1; - for (int i = currentSize; i < target; ++i) { - BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream, - SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES); - } - // We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in - // Java isn't expanded automatically when the underlying file is expanded. - reopenLookupTable(); - } - - mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); - int posInAddressTable = mLookupTableBuffer.readInt(); - if (posInAddressTable == SparseTable.NOT_EXIST) { - // Need to extend the address table - mLookupTableBuffer.position(mLookupTableBuffer.position() - - SparseTable.SIZE_OF_INT_IN_BYTES); - posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize; - BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer, - posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES); - for (int i = 0; i < mContentCount; ++i) { - for (int j = 0; j < mBlockSize; ++j) { - BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i], - SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES); - } - } - // We need to reopen the byte buffers of the address tables because a MappedByteBuffer - // in Java isn't expanded automatically when the underlying file is expanded. - reopenAddressTables(); - } - posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES; - - mAddressTableBuffers[contentId].position(posInAddressTable); - BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId], - value, SparseTable.SIZE_OF_INT_IN_BYTES); - } - - private void reopenLookupTable() throws IOException { - mLookupTableOutStream.flush(); - mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile); - } - - private void reopenAddressTables() throws IOException { - for (int i = 0; i < mContentCount; ++i) { - mAddressTableOutStreams[i].flush(); - mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]); - } - } - - protected void close() throws IOException { - mLookupTableOutStream.close(); - for (final OutputStream stream : mAddressTableOutStreams) { - stream.close(); - } - for (final OutputStream stream : mContentOutStreams) { - stream.close(); - } - } -} diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index e9667ab0b..ea0a2c6c2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -23,7 +23,6 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.JniUtils; import android.util.Log; diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 3be62f066..7071893d2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -440,6 +440,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } @Override + @UsedForTesting public void skipPtNode(final FormatOptions formatOptions) { final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); PtNodeReader.readParentAddress(mDictBuffer, formatOptions); diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java deleted file mode 100644 index 6298295c6..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java +++ /dev/null @@ -1,794 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.CollectionUtils; - -import android.util.Log; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; - -/** - * An implementation of DictUpdater for version 4 binary dictionary. - */ -@UsedForTesting -public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { - private static final String TAG = Ver4DictUpdater.class.getSimpleName(); - private static final int MAX_JUMPS = 10000; - - private OutputStream mDictStream; - private final File mFrequencyFile; - - @UsedForTesting - public Ver4DictUpdater(final File dictDirectory, final int factoryType) - throws UnsupportedFormatException { - // DictUpdater must have an updatable DictBuffer. - super(dictDirectory, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY) - ? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER); - mFrequencyFile = getFile(FILETYPE_FREQUENCY); - } - - private static class BigramContentUpdater extends SparseTableContentUpdater { - public BigramContentUpdater(final String name, final File baseDir, - final boolean hasTimestamp) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, - FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - BigramContentReader.getContentFilenames(name, hasTimestamp), - BigramContentReader.getContentIds(hasTimestamp), - new DictionaryBufferFromWritableByteBufferFactory()); - } - - public void insertBigramEntries(final int terminalId, final int frequency, - final ArrayList<PendingAttribute> entries) throws IOException { - if (terminalId < 0) { - throw new RuntimeException("Invalid terminal id : " + terminalId); - } - openStreamsAndBuffers(); - - if (entries == null || entries.isEmpty()) { - setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, - SparseTable.NOT_EXIST); - return; - } - final int positionOfEntries = - (int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length(); - setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries); - - final Iterator<PendingAttribute> bigramIterator = entries.iterator(); - while (bigramIterator.hasNext()) { - final PendingAttribute entry = bigramIterator.next(); - final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), - 0 /* offset */, entry.mFrequency, frequency, "" /* word */); - BinaryDictEncoderUtils.writeUIntToStream( - mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - BinaryDictEncoderUtils.writeUIntToStream( - mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress, - FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); - } - close(); - } - } - - private static class ShortcutContentUpdater extends SparseTableContentUpdater { - public ShortcutContentUpdater(final String name, final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, - FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, - new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, - new DictionaryBufferFromWritableByteBufferFactory()); - } - - public void insertShortcuts(final int terminalId, - final ArrayList<WeightedString> shortcuts) throws IOException { - if (terminalId < 0) { - throw new RuntimeException("Invalid terminal id : " + terminalId); - } - openStreamsAndBuffers(); - if (shortcuts == null || shortcuts.isEmpty()) { - setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, - SparseTable.NOT_EXIST); - return; - } - - final int positionOfShortcuts = - (int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length(); - setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts); - - final Iterator<WeightedString> shortcutIterator = shortcuts.iterator(); - while (shortcutIterator.hasNext()) { - final WeightedString target = shortcutIterator.next(); - final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( - shortcutIterator.hasNext(), target.mFrequency); - BinaryDictEncoderUtils.writeUIntToStream( - mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], - target.mWord); - } - close(); - } - } - - @Override - public void deleteWord(final String word) throws IOException, UnsupportedFormatException { - if (mDictBuffer == null) { - openDictBuffer(); - readHeader(); - } - final int wordPos = getTerminalPosition(word); - if (wordPos != FormatSpec.NOT_VALID_WORD) { - mDictBuffer.position(wordPos); - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - mDictBuffer.position(wordPos); - mDictBuffer.put((byte)markAsDeleted(flags)); - } - } - - private int getNewTerminalId() { - // The size of frequency file is FormatSpec.FREQUENCY_AND_FLAGS_SIZE * number of terminals - // because each terminal always has a frequency. - // So we can get a fresh terminal id by this logic. - // CAVEAT: we are reading the file size from the disk each time: beware of race conditions, - // even on one thread. - return (int) (mFrequencyFile.length() / FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - } - - private void updateParentPosIfNotMoved(final int nodePos, final int newParentPos, - final FormatOptions formatOptions) { - final int originalPos = getPosition(); - setPosition(nodePos); - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - if (!BinaryDictIOUtils.isMovedPtNode(flags, formatOptions)) { - final int parentOffset = newParentPos - nodePos; - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, parentOffset); - } - setPosition(originalPos); - } - - private void updateParentPositions(final int nodeArrayPos, final int newParentPos, - final FormatOptions formatOptions) { - final int originalPos = mDictBuffer.position(); - mDictBuffer.position(nodeArrayPos); - int jumpCount = 0; - do { - final int count = readPtNodeCount(); - for (int i = 0; i < count; ++i) { - updateParentPosIfNotMoved(getPosition(), newParentPos, formatOptions); - skipPtNode(formatOptions); - } - if (!readAndFollowForwardLink()) break; - } while (jumpCount++ < MAX_JUMPS); - setPosition(originalPos); - } - - private void updateChildrenPos(final int nodePos, final int newChildrenPos, - final FormatOptions options) { - final int originalPos = getPosition(); - setPosition(nodePos); - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - PtNodeReader.readParentAddress(mDictBuffer, options); - BinaryDictIOUtils.skipString(mDictBuffer, - (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer); - final int basePos = getPosition(); - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newChildrenPos - basePos); - setPosition(originalPos); - } - - private void updateTerminalPosition(final int terminalId, final int position) { - if (terminalId == PtNode.NOT_A_TERMINAL - || terminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE - >= mTerminalAddressTableBuffer.limit()) return; - mTerminalAddressTableBuffer.position(terminalId - * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - BinaryDictEncoderUtils.writeUIntToDictBuffer(mTerminalAddressTableBuffer, position, - FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - } - - private void updateForwardLink(final int nodeArrayPos, final int newForwardLink, - final FormatOptions formatOptions) { - final int originalPos = getPosition(); - setPosition(nodeArrayPos); - int jumpCount = 0; - while (jumpCount++ < MAX_JUMPS) { - final int ptNodeCount = readPtNodeCount(); - for (int i = 0; i < ptNodeCount; ++i) { - skipPtNode(formatOptions); - } - final int forwardLinkPos = getPosition(); - if (!readAndFollowForwardLink()) { - setPosition(forwardLinkPos); - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newForwardLink - forwardLinkPos); - break; - } - } - setPosition(originalPos); - } - - private void markPtNodeAsMoved(final int nodePos, final int newNodePos, - final FormatOptions options) { - final int originalPos = getPosition(); - updateParentPosIfNotMoved(nodePos, newNodePos, options); - setPosition(nodePos); - final int currentFlags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - setPosition(nodePos); - mDictBuffer.put((byte) (FormatSpec.FLAG_IS_MOVED - | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); - final int offset = newNodePos - nodePos; - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, offset); - setPosition(originalPos); - } - - /** - * Writes a PtNode to an output stream from a Ver4PtNodeInfo. - * - * @param nodePos the position of the head of the PtNode. - * @param info the PtNode info to be written. - * @return the size written, in bytes. - */ - private int writePtNode(final int nodePos, final Ver4PtNodeInfo info) throws IOException { - int written = 0; - - // Write flags. - mDictStream.write((byte) (info.mFlags & 0xFF)); - written += FormatSpec.PTNODE_FLAGS_SIZE; - - // Write the parent position. - final int parentOffset = info.mParentPos == FormatSpec.NO_PARENT_ADDRESS ? - FormatSpec.NO_PARENT_ADDRESS : info.mParentPos - nodePos; - BinaryDictIOUtils.writeSInt24ToStream(mDictStream, parentOffset); - written += FormatSpec.PARENT_ADDRESS_SIZE; - - // Write a string. - if (((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0) - != (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters > 1)) { - throw new RuntimeException("Inconsistent flags : hasMultipleChars = " - + ((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0) + ", length = " - + (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters)); - } - written += CharEncoding.writeCodePoints(mDictStream, info.mCharacters, - info.mStartIndexOfCharacters, info.mEndIndexOfCharacters); - - // Write the terminal id. - if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { - BinaryDictEncoderUtils.writeUIntToStream(mDictStream, info.mTerminalId, - FormatSpec.PTNODE_TERMINAL_ID_SIZE); - written += FormatSpec.PTNODE_TERMINAL_ID_SIZE; - } - - // Write the children position. - final int childrenOffset = info.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS - ? 0 : info.mChildrenPos - (nodePos + written); - BinaryDictIOUtils.writeSInt24ToStream(mDictStream, childrenOffset); - written += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; - - return written; - } - - /** - * Helper method to split and move PtNode. - * - * @param ptNodeArrayPos the position of PtNodeArray which contains the split and moved PtNode. - * @param splittedPtNodeToMovePos the position of the split and moved PtNode. - * @param newParent the parent PtNode after splitting. - * @param newChildren the children PtNodes after splitting. - * @param newParentStartPos where to write the new parent. - * @param formatOptions the format options. - */ - private void writeSplittedPtNodes(final int ptNodeArrayPos, final int splittedPtNodeToMovePos, - final Ver4PtNodeInfo newParent, final Ver4PtNodeInfo[] newChildren, - final int newParentStartPos, - final FormatOptions formatOptions) throws IOException { - updateTerminalPosition(newParent.mTerminalId, - newParentStartPos + 1 /* size of PtNodeCount */); - int written = writePtNodeArray(newParentStartPos, new Ver4PtNodeInfo[] { newParent }, - FormatSpec.NO_FORWARD_LINK_ADDRESS); - final int childrenStartPos = newParentStartPos + written; - writePtNodeArray(childrenStartPos, newChildren, FormatSpec.NO_FORWARD_LINK_ADDRESS); - int childrenNodePos = childrenStartPos + 1 /* size of PtNodeCount */; - for (final Ver4PtNodeInfo info : newChildren) { - updateTerminalPosition(info.mTerminalId, childrenNodePos); - childrenNodePos += computePtNodeSize(info.mCharacters, info.mStartIndexOfCharacters, - info.mEndIndexOfCharacters, - (info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0); - } - - // Mark as moved. - markPtNodeAsMoved(splittedPtNodeToMovePos, newParentStartPos + 1 /* size of PtNodeCount */, - formatOptions); - updateForwardLink(ptNodeArrayPos, newParentStartPos, formatOptions); - } - - /** - * Writes a node array to the stream. - * - * @param nodeArrayPos the position of the head of the node array. - * @param infos an array of Ver4PtNodeInfo to be written. - * @return the written length in bytes. - */ - private int writePtNodeArray(final int nodeArrayPos, final Ver4PtNodeInfo[] infos, - final int forwardLink) throws IOException { - int written = BinaryDictIOUtils.writePtNodeCount(mDictStream, infos.length); - for (int i = 0; i < infos.length; ++i) { - written += writePtNode(nodeArrayPos + written, infos[i]); - } - BinaryDictIOUtils.writeSInt24ToStream(mDictStream, forwardLink); - written += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; - return written; - } - - private int computePtNodeSize(final int[] codePoints, final int startIndex, final int endIndex, - final boolean isTerminal) { - return FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE - + CharEncoding.getCharArraySize(codePoints, startIndex, endIndex) - + (endIndex - startIndex > 1 ? FormatSpec.PTNODE_TERMINATOR_SIZE : 0) - + (isTerminal ? FormatSpec.PTNODE_TERMINAL_ID_SIZE : 0) - + FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; - } - - private void writeNewSinglePtNodeWithAttributes(final int[] codePoints, - final boolean hasShortcuts, final int terminalId, final boolean hasBigrams, - final boolean isNotAWord, final boolean isBlackListEntry, final int parentPos, - final FormatOptions formatOptions) throws IOException { - final int newNodeArrayPos = mDictBuffer.limit(); - final int newNodeFlags = BinaryDictEncoderUtils.makePtNodeFlags(codePoints.length > 1, - terminalId != PtNode.NOT_A_TERMINAL, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, - hasBigrams, isNotAWord, isBlackListEntry, formatOptions); - final Ver4PtNodeInfo info = new Ver4PtNodeInfo(newNodeFlags, codePoints, terminalId, - FormatSpec.NO_CHILDREN_ADDRESS, parentPos, 0 /* nodeSize */); - writePtNodeArray(newNodeArrayPos, new Ver4PtNodeInfo[] { info }, - FormatSpec.NO_FORWARD_LINK_ADDRESS); - } - - private int setMultipleCharsInFlags(final int currentFlags, final boolean hasMultipleChars) { - final int flags; - if (hasMultipleChars) { - flags = currentFlags | FormatSpec.FLAG_HAS_MULTIPLE_CHARS; - } else { - flags = currentFlags & (~FormatSpec.FLAG_HAS_MULTIPLE_CHARS); - } - return flags; - } - - private int setIsNotAWordInFlags(final int currentFlags, final boolean isNotAWord) { - final int flags; - if (isNotAWord) { - flags = currentFlags | FormatSpec.FLAG_IS_NOT_A_WORD; - } else { - flags = currentFlags & (~FormatSpec.FLAG_IS_NOT_A_WORD); - } - return flags; - } - - private int setIsBlackListEntryInFlags(final int currentFlags, final boolean isBlackListEntry) { - final int flags; - if (isBlackListEntry) { - flags = currentFlags | FormatSpec.FLAG_IS_BLACKLISTED; - } else { - flags = currentFlags & (~FormatSpec.FLAG_IS_BLACKLISTED); - } - return flags; - } - - /** - * Splits a PtNode. - * - * abcd - ef - * - * -> inserting "abc" - * - * abc - d - ef - * - * @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split. - * @param nodeToSplitPos the position of the PtNode to split. - * @param nodeToSplitInfo the information of the PtNode to split. - * @param indexToSplit the index where to split in the code points array. - * @param parentOfNodeToSplitPos the absolute position of a parent of the node to split. - * @param newTerminalId the terminal id of the inserted node (corresponds to "d"). - * @param hasShortcuts whether the inserted word should have shortcuts. - * @param hasBigrams whether the inserted word should have bigrams. - * @param isNotAWord whether the inserted word should be not a word. - * @param isBlackListEntry whether the inserted word should be a black list entry. - * @param formatOptions the format options. - */ - private void splitOnly(final int nodeArrayToSplitPos, final int nodeToSplitPos, - final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit, - final int parentOfNodeToSplitPos, final int newTerminalId, final boolean hasShortcuts, - final boolean hasBigrams, final boolean isNotAWord, final boolean isBlackListEntry, - final FormatOptions formatOptions) throws IOException { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */; - final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags(indexToSplit > 1, - true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, formatOptions); - final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags, - nodeToSplitInfo.mCharacters, newTerminalId, parentNodeStartPos - + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, true) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, - parentOfNodeToSplitPos, 0 /* nodeSize */); - parentInfo.mStartIndexOfCharacters = 0; - parentInfo.mEndIndexOfCharacters = indexToSplit; - - // Write the child. - final int childrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags, - nodeToSplitInfo.mCharacters.length - indexToSplit > 1); - final Ver4PtNodeInfo childrenInfo = new Ver4PtNodeInfo(childrenFlags, - nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId, - nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */); - childrenInfo.mStartIndexOfCharacters = indexToSplit; - childrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length; - if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentPositions(nodeToSplitInfo.mChildrenPos, - parentInfo.mChildrenPos + 1 /* size of PtNodeCount */, formatOptions); - } - - writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo, - new Ver4PtNodeInfo[] { childrenInfo }, parentNodeArrayStartPos, formatOptions); - } - - /** - * Split and branch a PtNode. - * - * ab - cd - * - * -> inserting "ac" - * - * a - b - cd - * | - * - c - * - * @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split. - * @param nodeToSplitPos the position of the PtNode to split. - * @param nodeToSplitInfo the information of the PtNode to split. - * @param indexToSplit the index where to split in the code points array. - * @param parentOfNodeToSplitPos the absolute position of parent of the node to split. - * @param newWordSuffixCodePoints the suffix of the newly inserted word (corresponds to "c"). - * @param startIndexOfNewWordSuffixCodePoints the start index in newWordSuffixCodePoints where - * the suffix starts. - * @param newTerminalId the terminal id of the inserted node (correspond to "c"). - * @param hasShortcuts whether the inserted word should have shortcuts. - * @param hasBigrams whether the inserted word should have bigrams. - * @param isNotAWord whether the inserted word should be not a word. - * @param isBlackListEntry whether the inserted word should be a black list entry. - * @param formatOptions the format options. - */ - private void splitAndBranch(final int nodeArrayToSplitPos, final int nodeToSplitPos, - final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit, - final int parentOfNodeToSplitPos, final int[] newWordSuffixCodePoints, - final int startIndexOfNewWordSuffixCodePoints, - final int newTerminalId, - final boolean hasShortcuts, final boolean hasBigrams, final boolean isNotAWord, - final boolean isBlackListEntry, final FormatOptions formatOptions) throws IOException { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */; - final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags( - indexToSplit > 1, - false /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, - false /* hasShortcut */, false /* hasBigrams */, - false /* isNotAWord */, false /* isBlackListEntry */, formatOptions); - final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags, - nodeToSplitInfo.mCharacters, PtNode.NOT_A_TERMINAL, - parentNodeStartPos - + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, - parentOfNodeToSplitPos, 0 /* nodeSize */); - parentInfo.mStartIndexOfCharacters = 0; - parentInfo.mEndIndexOfCharacters = indexToSplit; - - final int childrenNodeArrayStartPos = parentNodeStartPos - + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; - final int firstChildrenFlags = BinaryDictEncoderUtils.makePtNodeFlags( - newWordSuffixCodePoints.length - startIndexOfNewWordSuffixCodePoints > 1, - true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, formatOptions); - final Ver4PtNodeInfo firstChildrenInfo = new Ver4PtNodeInfo(firstChildrenFlags, - newWordSuffixCodePoints, newTerminalId, - FormatSpec.NO_CHILDREN_ADDRESS, parentNodeStartPos, - 0 /* nodeSize */); - firstChildrenInfo.mStartIndexOfCharacters = startIndexOfNewWordSuffixCodePoints; - firstChildrenInfo.mEndIndexOfCharacters = newWordSuffixCodePoints.length; - - final int secondChildrenStartPos = childrenNodeArrayStartPos + 1 /* size of ptNodeCount */ - + computePtNodeSize(newWordSuffixCodePoints, startIndexOfNewWordSuffixCodePoints, - newWordSuffixCodePoints.length, true /* isTerminal */); - final int secondChildrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags, - nodeToSplitInfo.mCharacters.length - indexToSplit > 1); - final Ver4PtNodeInfo secondChildrenInfo = new Ver4PtNodeInfo(secondChildrenFlags, - nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId, - nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */); - secondChildrenInfo.mStartIndexOfCharacters = indexToSplit; - secondChildrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length; - if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentPositions(nodeToSplitInfo.mChildrenPos, secondChildrenStartPos, - formatOptions); - } - - writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo, - new Ver4PtNodeInfo[] { firstChildrenInfo, secondChildrenInfo }, - parentNodeArrayStartPos, formatOptions); - } - - /** - * Inserts a word into the trie file and returns the position of inserted terminal node. - * If the insertion is failed, returns FormatSpec.NOT_VALID_WORD. - */ - @UsedForTesting - private int insertWordToTrie(final String word, final int newTerminalId, - final boolean isNotAWord, final boolean isBlackListEntry, final boolean hasBigrams, - final boolean hasShortcuts) throws IOException, UnsupportedFormatException { - setPosition(0); - final FileHeader header = readHeader(); - - final int[] codePoints = FusionDictionary.getCodePoints(word); - final int wordLen = codePoints.length; - - int wordPos = 0; - for (int depth = 0; depth < FormatSpec.MAX_WORD_LENGTH; /* nop */) { - final int nodeArrayPos = getPosition(); - final int ptNodeCount = readPtNodeCount(); - boolean goToChildren = false; - int parentPos = FormatSpec.NO_PARENT_ADDRESS; - for (int i = 0; i < ptNodeCount; ++i) { - final int nodePos = getPosition(); - final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(nodePos, header.mFormatOptions); - if (BinaryDictIOUtils.isMovedPtNode(nodeInfo.mFlags, header.mFormatOptions)) { - continue; - } - if (nodeInfo.mParentPos != FormatSpec.NO_PARENT_ADDRESS) { - parentPos = nodePos + nodeInfo.mParentPos; - } - - final boolean firstCharacterMatched = - codePoints[wordPos] == nodeInfo.mCharacters[0]; - boolean allCharactersMatched = true; - int firstDifferentCharacterIndex = -1; - for (int p = 0; p < nodeInfo.mCharacters.length; ++p) { - if (wordPos + p >= codePoints.length) break; - if (codePoints[wordPos + p] != nodeInfo.mCharacters[p]) { - if (firstDifferentCharacterIndex == -1) { - firstDifferentCharacterIndex = p; - } - allCharactersMatched = false; - } - } - - if (!firstCharacterMatched) { - // Go to the next sibling node. - continue; - } - - if (!allCharactersMatched) { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - splitAndBranch(nodeArrayPos, nodePos, nodeInfo, firstDifferentCharacterIndex, - parentPos, codePoints, wordPos + firstDifferentCharacterIndex, - newTerminalId, hasShortcuts, hasBigrams, isNotAWord, - isBlackListEntry, header.mFormatOptions); - - return parentNodeArrayStartPos + computePtNodeSize(codePoints, wordPos, - wordPos + firstDifferentCharacterIndex, false) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE + 1 /* size of PtNodeCount */; - } - - if (wordLen - wordPos < nodeInfo.mCharacters.length) { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - splitOnly(nodeArrayPos, nodePos, nodeInfo, wordLen - wordPos, parentPos, - newTerminalId, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, - header.mFormatOptions); - - // Return the position of the inserted word. - return parentNodeArrayStartPos + 1 /* size of PtNodeCount */; - } - - wordPos += nodeInfo.mCharacters.length; - if (wordPos == wordLen) { - // This dictionary already contains the word. - Log.e(TAG, "Something went wrong. If the word is already contained, " - + " there is no need to insert new PtNode."); - return FormatSpec.NOT_VALID_WORD; - } - if (nodeInfo.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS) { - // There are no children. - // We need to add a new node as a child of this node. - final int newNodeArrayPos = mDictBuffer.limit(); - final int[] newNodeCodePoints = Arrays.copyOfRange(codePoints, wordPos, - codePoints.length); - writeNewSinglePtNodeWithAttributes(newNodeCodePoints, hasShortcuts, - newTerminalId, hasBigrams, isNotAWord, isBlackListEntry, nodePos, - header.mFormatOptions); - updateChildrenPos(nodePos, newNodeArrayPos, header.mFormatOptions); - return newNodeArrayPos + 1 /* size of PtNodeCount */; - } else { - // Found the matched node. - // Go to the children of this node. - setPosition(nodeInfo.mChildrenPos); - goToChildren = true; - depth++; - break; - } - } - - if (goToChildren) continue; - if (!readAndFollowForwardLink()) { - // Add a new node that contains [wordPos, word.length()-1]. - // and update the forward link. - final int newNodeArrayPos = mDictBuffer.limit(); - final int[] newCodePoints = Arrays.copyOfRange(codePoints, wordPos, - codePoints.length); - writeNewSinglePtNodeWithAttributes(newCodePoints, hasShortcuts, newTerminalId, - hasBigrams, isNotAWord, isBlackListEntry, parentPos, header.mFormatOptions); - updateForwardLink(nodeArrayPos, newNodeArrayPos, header.mFormatOptions); - return newNodeArrayPos + 1 /* size of PtNodeCount */; - } - } - return FormatSpec.NOT_VALID_WORD; - } - - private void updateFrequency(final int terminalId, final int frequency) { - mFrequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - BinaryDictEncoderUtils.writeUIntToDictBuffer(mFrequencyBuffer, frequency, - FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - } - - private void insertFrequency(final int frequency) throws IOException { - final OutputStream frequencyStream = new FileOutputStream(mFrequencyFile, - true /* append */); - BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency, - FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - frequencyStream.close(); - } - - private void insertTerminalPosition(final int posOfTerminal) throws IOException, - UnsupportedFormatException { - final OutputStream terminalPosStream = new FileOutputStream( - getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */); - BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal, - FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - terminalPosStream.close(); - } - - private void insertBigrams(final int terminalId, final int frequency, - final ArrayList<PendingAttribute> bigramAddresses) - throws IOException, UnsupportedFormatException { - openDictBuffer(); - final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(), - mDictDirectory, false); - - // Convert addresses to terminal ids. - final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); - mDictBuffer.position(0); - final FileHeader header = readHeader(); - for (PendingAttribute attr : bigramAddresses) { - mDictBuffer.position(attr.mAddress); - final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions); - if (info.mTerminalId == PtNode.NOT_A_TERMINAL) { - throw new RuntimeException("We can't have a bigram target that's not a terminal."); - } - bigrams.add(new PendingAttribute(frequency, info.mTerminalId)); - } - updater.insertBigramEntries(terminalId, frequency, bigrams); - close(); - } - - private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts) - throws IOException { - final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(), - mDictDirectory); - updater.insertShortcuts(terminalId, shortcuts); - } - - private void openBuffersAndStream() throws IOException, UnsupportedFormatException { - openDictBuffer(); - mDictStream = new FileOutputStream(getFile(FILETYPE_TRIE), true /* append */); - } - - private void close() throws IOException { - if (mDictStream != null) { - mDictStream.close(); - mDictStream = null; - } - mDictBuffer = null; - mFrequencyBuffer = null; - mTerminalAddressTableBuffer = null; - } - - private void updateAttributes(final int posOfWord, final int frequency, - final ArrayList<WeightedString> bigramStrings, - final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, - final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { - mDictBuffer.position(0); - final FileHeader header = readHeader(); - mDictBuffer.position(posOfWord); - final Ver4PtNodeInfo info = readVer4PtNodeInfo(posOfWord, header.mFormatOptions); - final int terminalId = info.mTerminalId; - - // Update the flags. - final int newFlags = setIsNotAWordInFlags( - setIsBlackListEntryInFlags(info.mFlags, isBlackListEntry), isNotAWord); - mDictBuffer.position(posOfWord); - mDictBuffer.put((byte) newFlags); - - updateFrequency(terminalId, frequency); - insertBigrams(terminalId, frequency, resolveBigramPositions(this, bigramStrings)); - insertShortcuts(terminalId, shortcuts); - } - - @Override @UsedForTesting - public void insertWord(final String word, final int frequency, - final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts, - final boolean isNotAWord, final boolean isBlackListEntry) - throws IOException, UnsupportedFormatException { - final int newTerminalId = getNewTerminalId(); - - openBuffersAndStream(); - final int posOfWord = getTerminalPosition(word); - if (posOfWord != FormatSpec.NOT_VALID_WORD) { - // The word is already contained in the dictionary. - updateAttributes(posOfWord, frequency, bigramStrings, shortcuts, isNotAWord, - isBlackListEntry); - close(); - return; - } - - // Insert new PtNode into trie. - final int posOfTerminal = insertWordToTrie(word, newTerminalId, isNotAWord, - isBlackListEntry, bigramStrings != null && !bigramStrings.isEmpty(), - shortcuts != null && !shortcuts.isEmpty()); - insertFrequency(frequency); - insertTerminalPosition(posOfTerminal); - close(); - - insertBigrams(newTerminalId, frequency, resolveBigramPositions(this, bigramStrings)); - insertShortcuts(newTerminalId, shortcuts); - } - - /** - * Converts a list of WeightedString to a list of PendingAttribute. - */ - private static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater, - final ArrayList<WeightedString> bigramStrings) - throws IOException, UnsupportedFormatException { - if (bigramStrings == null) return CollectionUtils.newArrayList(); - final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); - for (final WeightedString bigram : bigramStrings) { - final int pos = dictUpdater.getTerminalPosition(bigram.mWord); - if (pos == FormatSpec.NOT_VALID_WORD) { - // TODO: figure out what is the correct thing to do here. - } else { - bigrams.add(new PendingAttribute(bigram.mFrequency, pos)); - } - } - return bigrams; - } - - private static int markAsDeleted(final int flags) { - return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED; - } -} |