26 files changed, 211 insertions, 75 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fe3167a61..898b44f44 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -80,32 +80,38 @@ int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, c
     return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
 }
 
-void Dictionary::addUnigramEntry(const int *const word, const int length,
+bool Dictionary::addUnigramEntry(const int *const word, const int length,
         const UnigramProperty *const unigramProperty) {
+    if (unigramProperty->representsBeginningOfSentence()
+            && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+                    ->supportsBeginningOfSentence()) {
+        AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+        return false;
+    }
     TimeKeeper::setCurrentTime();
-    mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
+    return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
 }
 
-void Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
         const BigramProperty *const bigramProperty) {
     TimeKeeper::setCurrentTime();
-    mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
+    return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
 }
 
-void Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
         const int *const word, const int length) {
     TimeKeeper::setCurrentTime();
-    mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
+    return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
 }
 
-void Dictionary::flush(const char *const filePath) {
+bool Dictionary::flush(const char *const filePath) {
     TimeKeeper::setCurrentTime();
-    mDictionaryStructureWithBufferPolicy->flush(filePath);
+    return mDictionaryStructureWithBufferPolicy->flush(filePath);
 }
 
-void Dictionary::flushWithGC(const char *const filePath) {
+bool Dictionary::flushWithGC(const char *const filePath) {
     TimeKeeper::setCurrentTime();
-    mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+    return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
 }
 
 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 817d9f7fc..f6d406fbd 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -57,6 +57,7 @@ class Dictionary {
     static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
     static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
     static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
+    static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
 
     Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
             dictionaryStructureWithBufferPolicy);
@@ -75,18 +76,18 @@ class Dictionary {
     int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
             const int *word, int length) const;
 
-    void addUnigramEntry(const int *const codePoints, const int codePointCount,
+    bool addUnigramEntry(const int *const codePoints, const int codePointCount,
             const UnigramProperty *const unigramProperty);
 
-    void addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+    bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
             const BigramProperty *const bigramProperty);
 
-    void removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
+    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
             const int length);
 
-    void flush(const char *const filePath);
+    bool flush(const char *const filePath);
 
-    void flushWithGC(const char *const filePath);
+    bool flushWithGC(const char *const filePath);
 
     bool needsToRunGC(const bool mindsBlockByGC);
 
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
index 0635fef7e..b6bf7a98c 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -31,4 +31,8 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
 const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
         NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
 
+const ErrorTypeUtils::ErrorType
+        ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
+                ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION;
+
 } // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index 0e8e5b635..e3e76b238 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -51,6 +51,11 @@ class ErrorTypeUtils {
         return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
     }
 
+    static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
+        return (containedErrorTypes
+                & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
+    }
+
     static bool isEditCorrectionError(const ErrorType errorType) {
         return (errorType & EDIT_CORRECTION) != 0;
     }
@@ -67,6 +72,7 @@ class ErrorTypeUtils {
     DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
 
     static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+    static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
 };
 } // namespace latinime
 #endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d2551057b..902eb000f 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -48,15 +48,21 @@ class UnigramProperty {
     };
 
     UnigramProperty()
-            : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
-              mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
-
-    UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
-            const int timestamp, const int level, const int count,
-            const std::vector<ShortcutProperty> *const shortcuts)
-            : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+            : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+              mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+              mShortcuts() {}
+
+    UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+            const bool isBlacklisted, const int probability, const int timestamp, const int level,
+            const int count, const std::vector<ShortcutProperty> *const shortcuts)
+            : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+              mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
               mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
 
+    bool representsBeginningOfSentence() const {
+        return mRepresentsBeginningOfSentence;
+    }
+
     bool isNotAWord() const {
         return mIsNotAWord;
     }
@@ -94,6 +100,7 @@ class UnigramProperty {
     DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
 
     // TODO: Make members const.
+    bool mRepresentsBeginningOfSentence;
     bool mIsNotAWord;
     bool mIsBlacklisted;
     int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 845e629e6..a61227626 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy {
 
     virtual const std::vector<int> *getLocale() const = 0;
 
+    virtual bool supportsBeginningOfSentence() const = 0;
+
  protected:
     DictionaryHeaderStructurePolicy() {}
 
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 3fd815f98..cda89406c 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -81,9 +81,11 @@ class DictionaryStructureWithBufferPolicy {
     virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
             const int *const word, const int length) = 0;
 
-    virtual void flush(const char *const filePath) = 0;
+    // Returns whether the flush was success or not.
+    virtual bool flush(const char *const filePath) = 0;
 
-    virtual void flushWithGC(const char *const filePath) = 0;
+    // Returns whether the GC and flush were success or not.
+    virtual bool flushWithGC(const char *const filePath) = 0;
 
     virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
 
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
index a307cb45d..23908255b 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -89,6 +89,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
                     terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
     const bool isExactMatch =
             ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
+    const bool isExactMatchWithIntentionalOmission =
+            ErrorTypeUtils::isExactMatchWithIntentionalOmission(
+                    terminalDicNode->getContainedErrorTypes());
     const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
     // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
     // (e.g. "AMD" and "and")
@@ -96,7 +99,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
             && !(isPossiblyOffensiveWord && isFirstCharUppercase);
     const int outputTypeFlags =
             (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
-            | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
+            | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
+            | (isExactMatchWithIntentionalOmission ?
+                    Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
 
     // Entries that are blacklisted or do not represent a word should not be output.
     const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index e4de1f4cc..56c53c1c2 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -20,11 +20,11 @@
 #include "defines.h"
 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
 
 namespace latinime {
 
 // TODO: Support n-gram.
-// TODO: Support beginning of sentence.
 // This class does not take ownership of any code point buffers.
 class PrevWordsInfo {
  public:
@@ -52,8 +52,7 @@ class PrevWordsInfo {
 
     void getPrevWordsTerminalPtNodePos(
             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
-            int *const outPrevWordsTerminalPtNodePos,
-            const bool tryLowerCaseSearch) const {
+            int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const {
         for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
             outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
                     mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
@@ -63,17 +62,11 @@ class PrevWordsInfo {
 
     BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
-        int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
-                mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
-        // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
-        // dictionary or has no bigrams
-        if (NOT_A_DICT_POS == pos) {
-            // If no bigrams for this exact word, search again in lower case.
-            pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
-                    mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
-        }
-        return BinaryDictionaryBigramsIterator(
-                dictStructurePolicy->getBigramsStructurePolicy(), pos);
+        const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch(
+                dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
+                mIsBeginningOfSentence[0]);
+        return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
+                bigramListPos);
     }
 
     // n is 1-indexed.
@@ -99,11 +92,21 @@ class PrevWordsInfo {
             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
             const int *const wordCodePoints, const int wordCodePointCount,
             const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
-        if (!dictStructurePolicy || !wordCodePoints) {
+        if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
             return NOT_A_DICT_POS;
         }
+        int codePoints[MAX_WORD_LENGTH];
+        int codePointCount = wordCodePointCount;
+        memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+        if (isBeginningOfSentence) {
+            codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+                    codePointCount, MAX_WORD_LENGTH);
+            if (codePointCount <= 0) {
+                return NOT_A_DICT_POS;
+            }
+        }
         const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
-                wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
+                codePoints, codePointCount, false /* forceLowerCaseSearch */);
         if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
             // Return the position when when the word was found or doesn't try lower case
             // search.
@@ -112,7 +115,36 @@ class PrevWordsInfo {
         // Check bigrams for lower-cased previous word if original was not found. Useful for
         // auto-capitalized words like "The [current_word]".
         return dictStructurePolicy->getTerminalPtNodePositionOfWord(
-                wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
+                codePoints, codePointCount, true /* forceLowerCaseSearch */);
+    }
+
+    static int getBigramListPositionForWordWithTryingLowerCaseSearch(
+            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+            const int *const wordCodePoints, const int wordCodePointCount,
+            const bool isBeginningOfSentence) {
+        if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
+            return NOT_A_DICT_POS;
+        }
+        int codePoints[MAX_WORD_LENGTH];
+        int codePointCount = wordCodePointCount;
+        memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+        if (isBeginningOfSentence) {
+            codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+                    codePointCount, MAX_WORD_LENGTH);
+            if (codePointCount <= 0) {
+                return NOT_A_DICT_POS;
+            }
+        }
+        int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+                codePointCount, false /* forceLowerCaseSearch */);
+        // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
+        // dictionary or has no bigrams
+        if (NOT_A_DICT_POS == pos) {
+            // If no bigrams for this exact word, search again in lower case.
+            pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+                    codePointCount, true /* forceLowerCaseSearch */);
+        }
+        return pos;
     }
 
     static int getBigramListPositionForWord(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 479d15164..75f4fef90 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -139,6 +139,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
         switch (mDictFormatVersion) {
             case FormatUtils::VERSION_2:
                 return FormatUtils::VERSION_2;
+            case FormatUtils::VERSION_401:
+                return FormatUtils::VERSION_401;
             case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
                 return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
             case FormatUtils::VERSION_4:
@@ -246,6 +248,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
         return &mLocale;
     }
 
+    bool supportsBeginningOfSentence() const {
+        return mDictFormatVersion > FormatUtils::VERSION_401;
+    }
+
  private:
     DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index a8f8f284b..b13ad1879 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -98,6 +98,7 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
         case FormatUtils::VERSION_2:
             // Version 2 dictionary writing is not supported.
             return false;
+        case FormatUtils::VERSION_401:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
index 97e1120a3..0f60a898d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
@@ -296,26 +296,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
     }
 }
 
-void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
     if (!mBuffers->isUpdatable()) {
         AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
-        return;
+        return false;
     }
     if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
         AKLOGE("Cannot flush the dictionary to file.");
         mIsCorrupted = true;
+        return false;
     }
+    return true;
 }
 
-void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
     if (!mBuffers->isUpdatable()) {
         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
-        return;
+        return false;
     }
     if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
         AKLOGE("Cannot flush the dictionary to file with GC.");
         mIsCorrupted = true;
+        return false;
     }
+    return true;
 }
 
 bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -432,8 +436,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
             historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h
index 95813881d..b064aaf33 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h
@@ -117,9 +117,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
     bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
             const int length);
 
-    void flush(const char *const filePath);
+    bool flush(const char *const filePath);
 
-    void flushWithGC(const char *const filePath);
+    bool flushWithGC(const char *const filePath);
 
     bool needsToRunGC(const bool mindsBlockByGC) const;
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index f93d2894c..93e330a2a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -57,13 +57,14 @@ namespace latinime {
                 const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
     FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
     switch (dictFormatVersion) {
-        case FormatUtils::VERSION_4: {
+        case FormatUtils::VERSION_401: {
             return newPolicyForOnMemoryV4Dict<backward::v401::Ver4DictConstants,
                     backward::v401::Ver4DictBuffers,
                     backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
                     backward::v401::Ver4PatriciaTriePolicy>(
                             dictFormatVersion, locale, attributeMap);
         }
+        case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4_DEV: {
             return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
@@ -115,13 +116,14 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
         case FormatUtils::VERSION_2:
             AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
             break;
-        case FormatUtils::VERSION_4: {
+        case FormatUtils::VERSION_401: {
             return newPolicyForV4Dict<backward::v401::Ver4DictConstants,
                     backward::v401::Ver4DictBuffers,
                     backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
                     backward::v401::Ver4PatriciaTriePolicy>(
                             headerFilePath, formatVersion, std::move(mmappedBuffer));
         }
+        case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4_DEV: {
             return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
@@ -177,6 +179,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
         case FormatUtils::VERSION_2:
             return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
                     new PatriciaTriePolicy(std::move(mmappedBuffer)));
+        case FormatUtils::VERSION_401:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_DEV:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
index 028e9ecbf..1f00fc6ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners
         }
     } else {
         mValueStack.back() += 1;
-        if (ptNodeParams->isTerminal()) {
+        if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
             mValidUnigramCount += 1;
         }
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 5704c2e90..b2e60a837 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,12 @@ class PtNodeParams {
     }
 
     AK_FORCE_INLINE bool representsNonWordInfo() const {
-        return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+        return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+                && isNotAWord();
+    }
+
+    AK_FORCE_INLINE int representsBeginningOfSentence() const {
+        return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
                 && isNotAWord();
     }
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 30dcfba37..a6a470c4e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
             shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
 }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 6240d46aa..88bbfd966 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -102,14 +102,16 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
         return false;
     }
 
-    void flush(const char *const filePath) {
+    bool flush(const char *const filePath) {
         // This method should not be called for non-updatable dictionary.
         AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+        return false;
     }
 
-    void flushWithGC(const char *const filePath) {
+    bool flushWithGC(const char *const filePath) {
         // This method should not be called for non-updatable dictionary.
         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+        return false;
     }
 
     bool needsToRunGC(const bool mindsBlockByGC) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index d53922763..e1ceaee49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -23,9 +23,11 @@ namespace latinime {
 const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
         int *const bigramEntryPos) const {
     const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
-    if (*bigramEntryPos < 0 || *bigramEntryPos >=  bigramListBuffer->getTailPosition()) {
-        AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d",
-                *bigramEntryPos, bigramListBuffer->getTailPosition());
+    const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+    if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+        AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+                "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+                        bigramListBuffer->getTailPosition());
         ASSERT(false);
         return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
                 Ver4DictConstants::NOT_A_TERMINAL_ID);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index b8bdb63a8..52447a336 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -99,6 +99,20 @@ class BigramDictContent : public SparseTableDictContent {
         return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
     }
 
+    int getBigramEntrySize() const {
+        if (mHasHistoricalInfo) {
+            return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+                    + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+                    + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+                    + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+                    + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+        } else {
+            return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+                    + Ver4DictConstants::PROBABILITY_SIZE
+                    + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+        }
+    }
+
     bool runGCBigramList(const int bigramListPos,
             const BigramDictContent *const sourceBigramDictContent, const int toPos,
             const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 439e90e44..09c7b7d85 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
         }
         readingHelper.readNextSiblingNode(ptNodeParams);
-        if (!ptNodeParams.representsNonWordInfo()) {
+        if (ptNodeParams.representsNonWordInfo()) {
             // Skip PtNodes that represent non-word information.
             continue;
         }
@@ -181,9 +181,19 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     readingHelper.initWithPtNodeArrayPos(getRootPosition());
     bool addedNewUnigram = false;
-    if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
+    int codePointsToAdd[MAX_WORD_LENGTH];
+    int codePointCountToAdd = length;
+    memmove(codePointsToAdd, word, sizeof(int) * length);
+    if (unigramProperty->representsBeginningOfSentence()) {
+        codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+                codePointCountToAdd, MAX_WORD_LENGTH);
+    }
+    if (codePointCountToAdd <= 0) {
+        return false;
+    }
+    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
             unigramProperty, &addedNewUnigram)) {
-        if (addedNewUnigram) {
+        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
             mUnigramCount++;
         }
         if (unigramProperty->getShortcuts().size() > 0) {
@@ -294,26 +304,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
     }
 }
 
-void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
     if (!mBuffers->isUpdatable()) {
         AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
-        return;
+        return false;
     }
     if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
         AKLOGE("Cannot flush the dictionary to file.");
         mIsCorrupted = true;
+        return false;
     }
+    return true;
 }
 
-void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
     if (!mBuffers->isUpdatable()) {
         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
-        return;
+        return false;
     }
     if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
         AKLOGE("Cannot flush the dictionary to file with GC.");
         mIsCorrupted = true;
+        return false;
     }
+    return true;
 }
 
 bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -430,8 +444,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
             shortcuts.emplace_back(&target, shortcutProbability);
         }
     }
-    const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
-            ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
             historicalInfo->getCount(), &shortcuts);
     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 008f2e423..d198c97fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -99,9 +99,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
     bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
             const int length1);
 
-    void flush(const char *const filePath);
+    bool flush(const char *const filePath);
 
-    void flushWithGC(const char *const filePath);
+    bool flushWithGC(const char *const filePath);
 
     bool needsToRunGC(const bool mindsBlockByGC) const;
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 105363db5..a04551a44 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -41,11 +41,12 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
     TimeKeeper::setCurrentTime();
     const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
     switch (formatVersion) {
-        case FormatUtils::VERSION_4:
+        case FormatUtils::VERSION_401:
             return createEmptyV4DictFile<backward::v401::Ver4DictConstants,
                     backward::v401::Ver4DictBuffers,
                     backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr>(
                             filePath, localeAsCodePointVector, attributeMap, formatVersion);
+        case FormatUtils::VERSION_4:
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4_DEV:
             return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index ba405b07e..18f558094 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -29,6 +29,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
     switch (formatVersion) {
         case VERSION_2:
             return VERSION_2;
+        case VERSION_401:
+            return VERSION_401;
         case VERSION_4_ONLY_FOR_TESTING:
             return VERSION_4_ONLY_FOR_TESTING;
         case VERSION_4:
@@ -60,6 +62,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
             // same so we use them for both here.
             if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
                 return VERSION_2;
+            } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_401) {
+                return VERSION_401;
             } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
                 return VERSION_4_ONLY_FOR_TESTING;
             } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index c47f30ca4..b05cb2fc8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -32,8 +32,9 @@ class FormatUtils {
         // These MUST have the same values as the relevant constants in FormatSpec.java.
         VERSION_2 = 2,
         VERSION_4_ONLY_FOR_TESTING = 399,
-        VERSION_4 = 401,
-        VERSION_4_DEV = 402,
+        VERSION_401 = 401,
+        VERSION_4 = 402,
+        VERSION_4_DEV = 403,
         UNKNOWN_VERSION = -1
     };
 
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 634c45b04..f28ed5682 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -18,6 +18,7 @@
 #define LATINIME_CHAR_UTILS_H
 
 #include <cctype>
+#include <cstring>
 #include <vector>
 
 #include "defines.h"
@@ -93,6 +94,19 @@ class CharUtils {
     static unsigned short latin_tolower(const unsigned short c);
     static const std::vector<int> EMPTY_STRING;
 
+    // Returns updated code point count. Returns 0 when the code points cannot be marked as a
+    // Beginning-of-Sentence.
+    static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
+            const int codePointCount, const int maxCodePoint) {
+        if (codePointCount >= maxCodePoint) {
+            // the code points cannot be marked as a Beginning-of-Sentence.
+            return 0;
+        }
+        memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
+        codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
+        return codePointCount + 1;
+    }
+
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);