diff options
20 files changed, 371 insertions, 68 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java index 0db63fd9f..0dbc7c858 100644 --- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java @@ -24,6 +24,7 @@ import com.android.inputmethod.latin.common.Constants; import com.android.inputmethod.latin.common.StringUtils; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; +import java.util.ArrayList; import java.util.Locale; public final class CapsModeUtils { @@ -326,4 +327,31 @@ public final class CapsModeUtils { // Here we arrived at the start of the line. This should behave exactly like whitespace. return (START == state || LETTER == state) ? noCaps : caps; } + + /** + * Convert capitalize mode flags into human readable text. + * + * @param capsFlags The modes flags to be converted. It may be any combination of + * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and + * {@link TextUtils#CAP_MODE_SENTENCES}. + * @return the text that describe the <code>capsMode</code>. + */ + public static String flagsToString(final int capsFlags) { + final int capsFlagsMask = TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS + | TextUtils.CAP_MODE_SENTENCES; + if ((capsFlags & ~capsFlagsMask) != 0) { + return "unknown<0x" + Integer.toHexString(capsFlags) + ">"; + } + final ArrayList<String> builder = new ArrayList<>(); + if ((capsFlags & android.text.TextUtils.CAP_MODE_CHARACTERS) != 0) { + builder.add("characters"); + } + if ((capsFlags & android.text.TextUtils.CAP_MODE_WORDS) != 0) { + builder.add("words"); + } + if ((capsFlags & android.text.TextUtils.CAP_MODE_SENTENCES) != 0) { + builder.add("sentences"); + } + return builder.isEmpty() ? "none" : TextUtils.join("|", builder); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java b/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java index 21daddce7..a381649a4 100644 --- a/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java +++ b/java/src/com/android/inputmethod/latin/utils/RecapitalizeStatus.java @@ -51,6 +51,17 @@ public class RecapitalizeStatus { } } + public static String modeToString(final int recapitalizeMode) { + switch (recapitalizeMode) { + case NOT_A_RECAPITALIZE_MODE: return "undefined"; + case CAPS_MODE_ORIGINAL_MIXED_CASE: return "mixedCase"; + case CAPS_MODE_ALL_LOWER: return "allLower"; + case CAPS_MODE_FIRST_WORD_UPPER: return "firstWordUpper"; + case CAPS_MODE_ALL_UPPER: return "allUpper"; + default: return "unknown<" + recapitalizeMode + ">"; + } + } + /** * We store the location of the cursor and the string that was there before the recapitalize * action was done, and the location of the cursor and the string that was there after. diff --git a/native/dicttoolkit/Android.mk b/native/dicttoolkit/Android.mk new file mode 100644 index 000000000..118682dfc --- /dev/null +++ b/native/dicttoolkit/Android.mk @@ -0,0 +1,67 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ifeq (,$(TARGET_BUILD_APPS)) + +# Only build if it's explicitly requested, or running mm/mmm. +ifneq ($(ONE_SHOT_MAKEFILE)$(filter $(MAKECMDGOALS),dicttoolkit),) + +# HACK: Temporarily disable host tool build on Mac until the build system is ready for C++11. +LATINIME_HOST_OSNAME := $(shell uname -s) +ifneq ($(LATINIME_HOST_OSNAME), Darwin) # TODO: Remove this + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LATIN_IME_CORE_PATH := $(LOCAL_PATH)/../jni + +LATIN_IME_DICT_TOOLKIT_SRC_DIR := src +LATIN_IME_CORE_SRC_DIR := ../jni/src + +LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \ + -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls \ + -Woverloaded-virtual -Wsign-promo -Wno-system-headers + +# To suppress compiler warnings for unused variables/functions used for debug features etc. +LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function +LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function + +include $(LOCAL_PATH)/NativeFileList.mk +include $(LATIN_IME_CORE_PATH)/NativeFileList.mk + +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \ + $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR) + +LOCAL_SRC_FILES := $(LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES) \ + $(addprefix $(LATIN_IME_DICT_TOOLKIT_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_SRC_FILES)) \ + $(addprefix $(LATIN_IME_CORE_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES)) + +LOCAL_MODULE := dicttoolkit +LOCAL_MODULE_TAGS := optional + +LOCAL_CLANG := true +LOCAL_CXX_STL := libc++ + +include $(BUILD_HOST_EXECUTABLE) +#################### Clean up the tmp vars +include $(LOCAL_PATH)/CleanupNativeFileList.mk +#################### Unit test +include $(LOCAL_PATH)/UnitTests.mk + +endif # Darwin - TODO: Remove this + +endif + +endif # TARGET_BUILD_APPS diff --git a/native/dicttoolkit/CleanupNativeFileList.mk b/native/dicttoolkit/CleanupNativeFileList.mk new file mode 100644 index 000000000..b804b41ed --- /dev/null +++ b/native/dicttoolkit/CleanupNativeFileList.mk @@ -0,0 +1,17 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := +LATIN_IME_DICT_TOOLKIT_SRC_FILES := +LATIN_IME_DICT_TOOLKIT_TEST_FILES := diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk new file mode 100644 index 000000000..b6be9c541 --- /dev/null +++ b/native/dicttoolkit/NativeFileList.mk @@ -0,0 +1,21 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := \ + dict_toolkit_main.cpp + +LATIN_IME_DICT_TOOLKIT_SRC_FILES := + +LATIN_IME_DICT_TOOLKIT_TEST_FILES := \ + dict_toolkit_defines_test.cpp diff --git a/native/dicttoolkit/UnitTests.mk b/native/dicttoolkit/UnitTests.mk new file mode 100644 index 000000000..d568db44a --- /dev/null +++ b/native/dicttoolkit/UnitTests.mk @@ -0,0 +1,68 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ifeq (,$(TARGET_BUILD_APPS)) + +LOCAL_PATH := $(call my-dir) + +###################################### +include $(CLEAR_VARS) + +LATIN_IME_CORE_PATH := $(LOCAL_PATH)/../jni + +LATIN_IME_DICT_TOOLKIT_SRC_DIR := src +LATIN_IME_CORE_SRC_DIR := ../jni/src +LATIN_DICT_TOOLKIT_TEST_SRC_DIR := tests + +include $(LOCAL_PATH)/NativeFileList.mk +include $(LATIN_IME_CORE_PATH)/NativeFileList.mk + +# TODO: Remove -std=c++11 once it is set by default on host build. +LATIN_IME_SRC_DIR := src +LOCAL_ADDRESS_SANITIZER := true +LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function +LOCAL_CLANG := true +LOCAL_CXX_STL := libc++ +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \ + $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR) +LOCAL_MODULE := liblatinime_dicttoolkit_host_static_for_unittests +LOCAL_MODULE_TAGS := optional +LOCAL_SRC_FILES := \ + $(addprefix $(LATIN_IME_DICT_TOOLKIT_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_SRC_FILES)) \ + $(addprefix $(LATIN_IME_CORE_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES)) +include $(BUILD_HOST_STATIC_LIBRARY) + +include $(CLEAR_VARS) + +# TODO: Remove -std=c++11 once it is set by default on host build. +LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function +LOCAL_CLANG := true +LOCAL_CXX_STL := libc++ +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \ + $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR) +LOCAL_MODULE := dicttoolkit_unittests +LOCAL_MODULE_TAGS := tests +LOCAL_SRC_FILES := \ + $(addprefix $(LATIN_DICT_TOOLKIT_TEST_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_TEST_FILES)) +LOCAL_STATIC_LIBRARIES += liblatinime_dicttoolkit_host_static_for_unittests +include $(BUILD_HOST_NATIVE_TEST) + +include $(LOCAL_PATH)/CleanupNativeFileList.mk + +#################### Clean up the tmp vars +LATINIME_HOST_OSNAME := +LATIN_IME_SRC_DIR := +LATIN_IME_TEST_SRC_DIR := + +endif # TARGET_BUILD_APPS diff --git a/native/dicttoolkit/dict_toolkit_main.cpp b/native/dicttoolkit/dict_toolkit_main.cpp new file mode 100644 index 000000000..d71b50eb4 --- /dev/null +++ b/native/dicttoolkit/dict_toolkit_main.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdio> + +int main(int argc, char **argv) { + // TODO: Implement. + printf("%s\n", argv[0]); + return 0; +} diff --git a/native/dicttoolkit/run_tests.sh b/native/dicttoolkit/run_tests.sh new file mode 100755 index 000000000..44c99c144 --- /dev/null +++ b/native/dicttoolkit/run_tests.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2014, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# check script arguments +if [[ $(type -t mmm) != function ]]; then +if [[ ${BASH_SOURCE[0]} != $0 ]]; then return; else exit 1; fi +fi + +# Host build is never supported in unbundled (NDK/tapas) build +if [[ -n $TARGET_BUILD_APPS ]]; then + echo "Host build is never supported in tapas build." 1>&2 + echo "Use lunch command instead." 1>&2 + if [[ ${BASH_SOURCE[0]} != $0 ]]; then return; else exit 1; fi +fi + +test_name=dicttoolkit_unittests + +pushd $PWD > /dev/null +cd $(gettop) +(mmm -j16 packages/inputmethods/LatinIME/native/dicttoolkit) || (make -j16 $test_name) +$ANDROID_HOST_OUT/bin/$test_name +popd > /dev/null diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h new file mode 100644 index 000000000..2a2104e26 --- /dev/null +++ b/native/dicttoolkit/src/dict_toolkit_defines.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DEFINES_H +#define LATINIME_DICT_TOOLKIT_DEFINES_H + +#include "defines.h" + +#endif // LATINIME_DICT_TOOLKIT_DEFINES_H diff --git a/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp b/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp new file mode 100644 index 000000000..3445bd0c5 --- /dev/null +++ b/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dict_toolkit_defines.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +// Initial trivial test case. +TEST(DictToolkitDefinesTest, TestKeycodeSpace) { + EXPECT_EQ(' ', KEYCODE_SPACE); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp index 1e2494e92..8f07ce275 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp @@ -31,6 +31,7 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100; const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_A_PERFECT_MATCH = NOT_AN_ERROR; const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h index fd1d5fcff..e92c509fa 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.h +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h @@ -52,6 +52,10 @@ class ErrorTypeUtils { return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0; } + static bool isPerfectMatch(const ErrorType containedErrorTypes) { + return (containedErrorTypes & ~ERRORS_TREATED_AS_A_PERFECT_MATCH) == 0; + } + static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) { return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0; @@ -73,6 +77,7 @@ class ErrorTypeUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils); static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH; + static const ErrorType ERRORS_TREATED_AS_A_PERFECT_MATCH; static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h index ce3684a1c..b9dda83ad 100644 --- a/native/jni/src/suggest/core/policy/scoring.h +++ b/native/jni/src/suggest/core/policy/scoring.h @@ -30,7 +30,7 @@ class Scoring { public: virtual int calculateFinalScore(const float compoundDistance, const int inputSize, const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit, - const bool boostExactMatches) const = 0; + const bool boostExactMatches, const bool hasProbabilityZero) const = 0; virtual void getMostProbableString(const DicTraverseSession *const traverseSession, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const = 0; diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp index 23103b9f7..74db95953 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp @@ -161,7 +161,7 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; compoundDistance, traverseSession->getInputSize(), terminalDicNode->getContainedErrorTypes(), (forceCommitMultiWords && terminalDicNode->hasMultipleWords()), - boostExactMatches); + boostExactMatches, wordAttributes.getProbability() == 0); // Don't output invalid or blocked offensive words. However, we still need to submit their // shortcuts if any. diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index a6f9a8b23..856808a74 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -24,6 +24,7 @@ const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120; const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f; const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f; +const float ScoringParams::PERFECT_MATCH_PROMOTION = 1.1f; const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f; const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f; const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f; diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h index b8f889559..6f327a370 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -34,6 +34,7 @@ class ScoringParams { static const int THRESHOLD_SHORT_WORD_LENGTH; static const float EXACT_MATCH_PROMOTION; + static const float PERFECT_MATCH_PROMOTION; static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH; static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h index 0240bcf54..6acd767ea 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h @@ -44,23 +44,50 @@ class TypingScoring : public Scoring { AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize, const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit, - const bool boostExactMatches) const { + const bool boostExactMatches, const bool hasProbabilityZero) const { const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE + static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT; float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance; if (forceCommit) { score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD; } - if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) { - score += ScoringParams::EXACT_MATCH_PROMOTION; - if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) { - score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH; + if (hasProbabilityZero) { + // Previously, when both legitimate 0-frequency words (such as distracters) and + // offensive words were encoded in the same way, distracters would never show up + // when the user blocked offensive words (the default setting, as well as the + // setting for regression tests). + // + // When b/11031090 was fixed and a separate encoding was used for offensive words, + // 0-frequency words would no longer be blocked when they were an "exact match" + // (where case mismatches and accent mismatches would be considered an "exact + // match"). The exact match boosting functionality meant that, for example, when + // the user typed "mt" they would be suggested the word "Mt", although they most + // probably meant to type "my". + // + // For this reason, we introduced this change, which does the following: + // * Defines the "perfect match" as a really exact match, with no room for case or + // accent mismatches + // * When the target word has probability zero (as "Mt" does, because it is a + // distracter), ONLY boost its score if it is a perfect match. + // + // By doing this, when the user types "mt", the word "Mt" will NOT be boosted, and + // they will get "my". However, if the user makes an explicit effort to type "Mt", + // we do boost the word "Mt" so that the user's input is not autocorrected to "My". + if (boostExactMatches && ErrorTypeUtils::isPerfectMatch(containedErrorTypes)) { + score += ScoringParams::PERFECT_MATCH_PROMOTION; } - if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) { - score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; - } - if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) { - score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH; + } else { + if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) { + score += ScoringParams::EXACT_MATCH_PROMOTION; + if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) { + score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH; + } + if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) { + score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; + } + if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) { + score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH; + } } } return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE); diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index 13d7c37af..42659253a 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -42,8 +42,6 @@ LATINIME_TESTS_SRC_DIR := $(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmetho # a significant part of the dependencies are mocked in the compat/ directory, with empty or # nearly-empty implementations, for parts that we don't use in Dicttool. LATINIME_SRC_FILES_FOR_DICTTOOL := \ - event/Combiner.java \ - event/Event.java \ latin/BinaryDictionary.java \ latin/DicTraverseSession.java \ latin/Dictionary.java \ diff --git a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java b/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java deleted file mode 100644 index c4457a1b7..000000000 --- a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.event; - -import java.util.ArrayList; - -/** - * Compatibility class that stands in for the combiner chain in LatinIME. - * - * This is not used by dicttool, it's just needed by the dependency chain. - */ -// TODO: there should not be a dependency to this in dicttool, so there -// should be a sensible way to separate them cleanly. -public class CombinerChain { - private StringBuilder mComposingWord; - public CombinerChain(final String initialText, final Combiner... combinerList) { - mComposingWord = new StringBuilder(initialText); - } - - public Event processEvent(final ArrayList<Event> previousEvents, final Event newEvent) { - return newEvent; - } - - public void applyProcessedEvent(final Event event) { - mComposingWord.append(event.getTextToCommit()); - } - - public CharSequence getComposingWordWithCombiningFeedback() { - return mComposingWord; - } - - public void reset() { - mComposingWord.setLength(0); - } - - public static Combiner[] createCombiners(final String spec) { - // Dicttool never uses a combiner at all, so we just return a zero-sized array. - return new Combiner[0]; - } -} diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java index 0d93c7fa9..07450ca51 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java @@ -18,6 +18,7 @@ package com.android.inputmethod.latin.dicttool; public class CommandList { public static void populate() { + // TODO: Move some commands to native code. Dicttool.addCommand("info", Info.class); Dicttool.addCommand("diff", Diff.class); Dicttool.addCommand("compress", Compress.Compressor.class); |