diff options
9 files changed, 208 insertions, 19 deletions
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index fa7f23efb..925381b50 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -607,8 +607,11 @@ public final class RichInputConnection { } } - return new TextRange(TextUtils.concat(before, after), startIndexInBefore, - before.length() + endIndexInAfter, before.length()); + // We don't use TextUtils#concat because it copies all spans without respect to their + // nature. If the text includes a PARAGRAPH span and it has been split, then + // TextUtils#concat will crash when it tries to concat both sides of it. + return new TextRange(StringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after), + startIndexInBefore, before.length() + endIndexInAfter, before.length()); } public boolean isCursorTouchingWord(final SettingsValues settingsValues) { diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java index 121aecf0f..327780ad0 100644 --- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java @@ -20,7 +20,12 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.settings.SettingsValues; +import android.text.Spannable; +import android.text.SpannableString; +import android.text.Spanned; +import android.text.SpannedString; import android.text.TextUtils; +import android.text.style.SuggestionSpan; import android.util.JsonReader; import android.util.JsonWriter; import android.util.Log; @@ -462,4 +467,88 @@ public final class StringUtils { } return ""; } + + /** + * Copies the spans from the region <code>start...end</code> in + * <code>source</code> to the region + * <code>destoff...destoff+end-start</code> in <code>dest</code>. + * Spans in <code>source</code> that begin before <code>start</code> + * or end after <code>end</code> but overlap this range are trimmed + * as if they began at <code>start</code> or ended at <code>end</code>. + * Only SuggestionSpans that don't have the SPAN_PARAGRAPH span are copied. + * + * This code is almost entirely taken from {@link TextUtils#copySpansFrom}, except for the + * kind of span that is copied. + * + * @throws IndexOutOfBoundsException if any of the copied spans + * are out of range in <code>dest</code>. + */ + public static void copyNonParagraphSuggestionSpansFrom(Spanned source, int start, int end, + Spannable dest, int destoff) { + Object[] spans = source.getSpans(start, end, SuggestionSpan.class); + + for (int i = 0; i < spans.length; i++) { + int fl = source.getSpanFlags(spans[i]); + if (0 != (fl & Spannable.SPAN_PARAGRAPH)) continue; + + int st = source.getSpanStart(spans[i]); + int en = source.getSpanEnd(spans[i]); + + if (st < start) + st = start; + if (en > end) + en = end; + + dest.setSpan(spans[i], st - start + destoff, en - start + destoff, + fl); + } + } + + /** + * Returns a CharSequence concatenating the specified CharSequences, retaining their + * SuggestionSpans that don't have the PARAGRAPH flag, but not other spans. + * + * This code is almost entirely taken from {@link TextUtils#concat(CharSequence...)}, except + * it calls copyNonParagraphSuggestionSpansFrom instead of {@link TextUtils#copySpansFrom}. + */ + public static CharSequence concatWithNonParagraphSuggestionSpansOnly(CharSequence... text) { + if (text.length == 0) { + return ""; + } + + if (text.length == 1) { + return text[0]; + } + + boolean spanned = false; + for (int i = 0; i < text.length; i++) { + if (text[i] instanceof Spanned) { + spanned = true; + break; + } + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < text.length; i++) { + sb.append(text[i]); + } + + if (!spanned) { + return sb.toString(); + } + + SpannableString ss = new SpannableString(sb); + int off = 0; + for (int i = 0; i < text.length; i++) { + int len = text[i].length(); + + if (text[i] instanceof Spanned) { + copyNonParagraphSuggestionSpansFrom((Spanned) text[i], 0, len, ss, off); + } + + off += len; + } + + return new SpannedString(ss); + } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 051da1be3..42397c19e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -268,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const { AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; } - // TODO: Implement. - return false; + // TODO: Implement more properly. + return mBufferWithExtendableBuffer.isNearSizeLimit(); } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index d2b9a5e97..a51ae5e1d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -35,6 +35,8 @@ namespace latinime { const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp"; +// TODO: Make MAX_DICTIONARY_SIZE 8MB. +const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, @@ -154,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) { return; } - BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, + MAX_DICTIONARY_SIZE); if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { return; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index d8058cc4e..028fa6075 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -86,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper { static const int CHILDREN_POSITION_FIELD_SIZE; static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + static const size_t MAX_DICTIONARY_SIZE; BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 0fed275e9..f692882f2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -18,9 +18,10 @@ namespace latinime { -const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024; const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; -const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024; +const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90; +// TODO: Needs to allocate larger memory corresponding to the current vector size. +const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { @@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co return true; } +bool BufferWithExtendableBuffer::extendBuffer() { + const size_t sizeAfterExtending = + mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; + if (sizeAfterExtending > mMaxAdditionalBufferSize) { + return false; + } + mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP); + return true; +} + bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) { if (isInAdditionalBuffer(pos)) { const int tailPosition = getTailPosition(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index c6a484131..17d2e39c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -32,9 +32,11 @@ namespace latinime { // raw pointer but provides several methods that handle boundary checking for writing data. class BufferWithExtendableBuffer { public: - BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize) + BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, + const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE) : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), - mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {} + mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), + mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} AK_FORCE_INLINE int getTailPosition() const { return mOriginalBufferSize + mUsedAdditionalBufferSize; @@ -61,6 +63,11 @@ class BufferWithExtendableBuffer { return mOriginalBufferSize; } + AK_FORCE_INLINE bool isNearSizeLimit() const { + return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize + * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100); + } + /** * For writing. * @@ -75,28 +82,22 @@ class BufferWithExtendableBuffer { private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); - static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE; static const size_t MAX_ADDITIONAL_BUFFER_SIZE; + static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; uint8_t *const mOriginalBuffer; const int mOriginalBufferSize; std::vector<uint8_t> mAdditionalBuffer; int mUsedAdditionalBufferSize; + const size_t mMaxAdditionalBufferSize; // Return if the buffer is successfully extended or not. - AK_FORCE_INLINE bool extendBuffer() { - if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP - > MAX_ADDITIONAL_BUFFER_SIZE) { - return false; - } - mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP); - return true; - } + bool extendBuffer(); // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of // the additional buffer, try extending the buffer. - AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size); + bool checkAndPrepareWriting(const int pos, const int size); }; } #endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */ diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index e26b300a8..96a2217a3 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -604,4 +604,50 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + public void testAddManyUnigramsAndFlushWithGC() { + final int flashWithGCIterationCount = 3; + final int codePointSetSize = 50; + final int seed = 22360679; + + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + BinaryDictionary binaryDictionary; + for (int i = 0; i < flashWithGCIterationCount; i++) { + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + while(!binaryDictionary.needsToRunGC()) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities.put(word, unigramProbability); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + + for (int j = 0; j < words.size(); j++) { + final String word = words.get(j); + final int unigramProbability = unigramProbabilities.get(word); + assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); + } + + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + } + + dictFile.delete(); + } } diff --git a/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java index 4e396a1cf..eb9fb984e 100644 --- a/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java @@ -20,6 +20,11 @@ import com.android.inputmethod.latin.settings.SettingsValues; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.SmallTest; +import android.text.style.SuggestionSpan; +import android.text.style.URLSpan; +import android.text.SpannableStringBuilder; +import android.text.Spannable; +import android.text.Spanned; import java.util.Arrays; import java.util.List; @@ -280,4 +285,34 @@ public class StringUtilsTests extends AndroidTestCase { assertEquals(objs[i], newObjArray.get(i)); } } + + public void testConcatWithSuggestionSpansOnly() { + SpannableStringBuilder s = new SpannableStringBuilder("test string\ntest string\n" + + "test string\ntest string\ntest string\ntest string\ntest string\ntest string\n" + + "test string\ntest string\n"); + final int N = 10; + for (int i = 0; i < N; ++i) { + // Put a PARAGRAPH-flagged span that should not be found in the result. + s.setSpan(new SuggestionSpan(getContext(), + new String[] {"" + i}, Spannable.SPAN_PARAGRAPH), + i * 12, i * 12 + 12, Spannable.SPAN_PARAGRAPH); + // Put a normal suggestion span that should be found in the result. + s.setSpan(new SuggestionSpan(getContext(), new String[] {"" + i}, 0), i, i * 2, 0); + // Put a URL span than should not be found in the result. + s.setSpan(new URLSpan("http://a"), i, i * 2, 0); + } + + final CharSequence a = s.subSequence(0, 15); + final CharSequence b = s.subSequence(15, s.length()); + final Spanned result = + (Spanned)StringUtils.concatWithNonParagraphSuggestionSpansOnly(a, b); + + Object[] spans = result.getSpans(0, result.length(), SuggestionSpan.class); + for (int i = 0; i < spans.length; i++) { + final int flags = result.getSpanFlags(spans[i]); + assertEquals("Should not find a span with PARAGRAPH flag", + flags & Spannable.SPAN_PARAGRAPH, 0); + assertTrue("Should be a SuggestionSpan", spans[i] instanceof SuggestionSpan); + } + } } |