diff options
author | 2014-06-09 14:51:17 +0900 | |
---|---|---|
committer | 2014-06-09 19:38:03 +0900 | |
commit | 7a6bc607ca0fe209cfc2f2c38575dc868496fd79 (patch) | |
tree | 81ead4b5bc8005225082aae806f1374cfed863fb /java/src/com/android/inputmethod/latin/spellcheck/SentenceLevelAdapter.java | |
parent | a2a4ee276531f3f0455bc5099ad17cf12b580b72 (diff) | |
download | latinime-7a6bc607ca0fe209cfc2f2c38575dc868496fd79.tar.gz latinime-7a6bc607ca0fe209cfc2f2c38575dc868496fd79.tar.xz latinime-7a6bc607ca0fe209cfc2f2c38575dc868496fd79.zip |
Split the sentences into words ourselves
The framework's default split is not suitable for all
languages. Also it does not perform very well when space
is mistyped as period.
Bug: 9063355
Bug: 10780091
Change-Id: I400d790ff1c29f221697fd94d79bbf67c61c7b8a
Diffstat (limited to 'java/src/com/android/inputmethod/latin/spellcheck/SentenceLevelAdapter.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/SentenceLevelAdapter.java | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SentenceLevelAdapter.java b/java/src/com/android/inputmethod/latin/spellcheck/SentenceLevelAdapter.java new file mode 100644 index 000000000..13352f39e --- /dev/null +++ b/java/src/com/android/inputmethod/latin/spellcheck/SentenceLevelAdapter.java @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.spellcheck; + +import android.content.res.Resources; +import android.view.textservice.SentenceSuggestionsInfo; +import android.view.textservice.SuggestionsInfo; +import android.view.textservice.TextInfo; + +import com.android.inputmethod.latin.Constants; +import com.android.inputmethod.latin.settings.SpacingAndPunctuations; +import com.android.inputmethod.latin.utils.RunInLocale; + +import java.util.ArrayList; +import java.util.Locale; + +/** + * This code is mostly lifted directly from android.service.textservice.SpellCheckerService in + * the framework; maybe that should be protected instead, so that implementers don't have to + * rewrite everything for any small change. + */ +public class SentenceLevelAdapter { + public static final SentenceSuggestionsInfo[] EMPTY_SENTENCE_SUGGESTIONS_INFOS = + new SentenceSuggestionsInfo[] {}; + private static final SuggestionsInfo EMPTY_SUGGESTIONS_INFO = new SuggestionsInfo(0, null); + /** + * Container for split TextInfo parameters + */ + public static class SentenceWordItem { + public final TextInfo mTextInfo; + public final int mStart; + public final int mLength; + public SentenceWordItem(TextInfo ti, int start, int end) { + mTextInfo = ti; + mStart = start; + mLength = end - start; + } + } + + /** + * Container for originally queried TextInfo and parameters + */ + public static class SentenceTextInfoParams { + final TextInfo mOriginalTextInfo; + final ArrayList<SentenceWordItem> mItems; + final int mSize; + public SentenceTextInfoParams(TextInfo ti, ArrayList<SentenceWordItem> items) { + mOriginalTextInfo = ti; + mItems = items; + mSize = items.size(); + } + } + + private static class WordIterator { + private final SpacingAndPunctuations mSpacingAndPunctuations; + public WordIterator(final Resources res, final Locale locale) { + final RunInLocale<SpacingAndPunctuations> job + = new RunInLocale<SpacingAndPunctuations>() { + @Override + protected SpacingAndPunctuations job(final Resources res) { + return new SpacingAndPunctuations(res); + } + }; + mSpacingAndPunctuations = job.runInLocale(res, locale); + } + + public int getEndOfWord(final CharSequence sequence, int index) { + final int length = sequence.length(); + index = index < 0 ? 0 : Character.offsetByCodePoints(sequence, index, 1); + while (index < length) { + final int codePoint = Character.codePointAt(sequence, index); + if (mSpacingAndPunctuations.isWordSeparator(codePoint)) { + // If it's a period, we want to stop here only if it's followed by another + // word separator. In all other cases we stop here. + if (Constants.CODE_PERIOD == codePoint) { + final int indexOfNextCodePoint = + index + Character.charCount(Constants.CODE_PERIOD); + if (indexOfNextCodePoint < length + && mSpacingAndPunctuations.isWordSeparator( + Character.codePointAt(sequence, indexOfNextCodePoint))) { + return index; + } + } else { + return index; + } + } + index += Character.charCount(codePoint); + } + return index; + } + + public int getBeginningOfNextWord(final CharSequence sequence, int index) { + final int length = sequence.length(); + if (index >= length) { + return -1; + } + index = index < 0 ? 0 : Character.offsetByCodePoints(sequence, index, 1); + while (index < length) { + final int codePoint = Character.codePointAt(sequence, index); + if (!mSpacingAndPunctuations.isWordSeparator(codePoint)) { + return index; + } + index += Character.charCount(codePoint); + } + return -1; + } + } + + private final WordIterator mWordIterator; + public SentenceLevelAdapter(final Resources res, final Locale locale) { + mWordIterator = new WordIterator(res, locale); + } + + public SentenceTextInfoParams getSplitWords(TextInfo originalTextInfo) { + final WordIterator wordIterator = mWordIterator; + final CharSequence originalText = originalTextInfo.getText(); + final int cookie = originalTextInfo.getCookie(); + final int start = -1; + final int end = originalText.length(); + final ArrayList<SentenceWordItem> wordItems = new ArrayList<SentenceWordItem>(); + int wordStart = wordIterator.getBeginningOfNextWord(originalText, start); + int wordEnd = wordIterator.getEndOfWord(originalText, wordStart); + while (wordStart <= end && wordEnd != -1 && wordStart != -1) { + if (wordEnd >= start && wordEnd > wordStart) { + final String query = originalText.subSequence(wordStart, wordEnd).toString(); + final TextInfo ti = new TextInfo(query, cookie, query.hashCode()); + wordItems.add(new SentenceWordItem(ti, wordStart, wordEnd)); + } + wordStart = wordIterator.getBeginningOfNextWord(originalText, wordEnd); + if (wordStart == -1) { + break; + } + wordEnd = wordIterator.getEndOfWord(originalText, wordStart); + } + return new SentenceTextInfoParams(originalTextInfo, wordItems); + } + + public static SentenceSuggestionsInfo reconstructSuggestions( + SentenceTextInfoParams originalTextInfoParams, SuggestionsInfo[] results) { + if (results == null || results.length == 0) { + return null; + } + if (originalTextInfoParams == null) { + return null; + } + final int originalCookie = originalTextInfoParams.mOriginalTextInfo.getCookie(); + final int originalSequence = + originalTextInfoParams.mOriginalTextInfo.getSequence(); + + final int querySize = originalTextInfoParams.mSize; + final int[] offsets = new int[querySize]; + final int[] lengths = new int[querySize]; + final SuggestionsInfo[] reconstructedSuggestions = new SuggestionsInfo[querySize]; + for (int i = 0; i < querySize; ++i) { + final SentenceWordItem item = originalTextInfoParams.mItems.get(i); + SuggestionsInfo result = null; + for (int j = 0; j < results.length; ++j) { + final SuggestionsInfo cur = results[j]; + if (cur != null && cur.getSequence() == item.mTextInfo.getSequence()) { + result = cur; + result.setCookieAndSequence(originalCookie, originalSequence); + break; + } + } + offsets[i] = item.mStart; + lengths[i] = item.mLength; + reconstructedSuggestions[i] = result != null ? result : EMPTY_SUGGESTIONS_INFO; + } + return new SentenceSuggestionsInfo(reconstructedSuggestions, offsets, lengths); + } +} |