aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/dictionary/structure/v4/content/probability_entry.h
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/dictionary/structure/v4/content/probability_entry.h')
-rw-r--r--native/jni/src/dictionary/structure/v4/content/probability_entry.h176
1 files changed, 176 insertions, 0 deletions
diff --git a/native/jni/src/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/dictionary/structure/v4/content/probability_entry.h
new file mode 100644
index 000000000..473354b90
--- /dev/null
+++ b/native/jni/src/dictionary/structure/v4/content/probability_entry.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_ENTRY_H
+#define LATINIME_PROBABILITY_ENTRY_H
+
+#include <climits>
+#include <cstdint>
+
+#include "defines.h"
+#include "dictionary/property/historical_info.h"
+#include "dictionary/property/ngram_property.h"
+#include "dictionary/property/unigram_property.h"
+#include "dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class ProbabilityEntry {
+ public:
+ ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
+ : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
+ mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
+
+ // Dummy entry
+ ProbabilityEntry()
+ : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
+ mHistoricalInfo() {}
+
+ // Entry without historical information
+ ProbabilityEntry(const int flags, const int probability)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
+
+ // Entry with historical information.
+ ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {}
+
+ // Create from unigram property.
+ ProbabilityEntry(const UnigramProperty *const unigramProperty)
+ : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ unigramProperty->isPossiblyOffensive())),
+ mProbability(unigramProperty->getProbability()),
+ mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
+
+ // Create from ngram property.
+ // TODO: Set flags.
+ ProbabilityEntry(const NgramProperty *const ngramProperty)
+ : mFlags(0), mProbability(ngramProperty->getProbability()),
+ mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
+
+ bool isValid() const {
+ return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ uint8_t getFlags() const {
+ return mFlags;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ bool representsBeginningOfSentence() const {
+ return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
+ }
+
+ bool isNotAWord() const {
+ return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
+ }
+
+ bool isBlacklisted() const {
+ return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
+ }
+
+ bool isPossiblyOffensive() const {
+ return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
+ }
+
+ uint64_t encode(const bool hasHistoricalInfo) const {
+ uint64_t encodedEntry = static_cast<uint8_t>(mFlags);
+ if (hasHistoricalInfo) {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
+ | static_cast<uint32_t>(mHistoricalInfo.getTimestamp());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
+ | static_cast<uint8_t>(mHistoricalInfo.getLevel());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
+ | static_cast<uint16_t>(mHistoricalInfo.getCount());
+ } else {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
+ | static_cast<uint8_t>(mProbability);
+ }
+ return encodedEntry;
+ }
+
+ static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
+ if (hasHistoricalInfo) {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int timestamp = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int level = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int count = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, &historicalInfo);
+ } else {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
+ Ver4DictConstants::PROBABILITY_SIZE);
+ const int probability = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
+ return ProbabilityEntry(flags, probability);
+ }
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
+
+ const uint8_t mFlags;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+
+ static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
+ return static_cast<int>(
+ (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
+ }
+
+ static uint8_t createFlags(const bool representsBeginningOfSentence,
+ const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
+ uint8_t flags = 0;
+ if (representsBeginningOfSentence) {
+ flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
+ }
+ if (isNotAWord) {
+ flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
+ }
+ if (isBlacklisted) {
+ flags |= Ver4DictConstants::FLAG_BLACKLISTED;
+ }
+ if (isPossiblyOffensive) {
+ flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
+ }
+ return flags;
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_ENTRY_H */