1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "binary_format.h"
#include "defines.h"
#include "digraph_utils.h"
namespace latinime {
const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
{ { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
{ 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
{ 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
{ { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
{ 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
{ DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
const int dictFlags, const int compositeGlyphCodePoint) {
const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
return true;
}
return false;
}
// Returns the digraph type associated with the given dictionary.
/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
const int dictFlags) {
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
return DIGRAPH_TYPE_FRENCH_LIGATURES;
}
return DIGRAPH_TYPE_NONE;
}
// Retrieves the set of all digraphs associated with the given dictionary flags.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
const int dictFlags, const DigraphUtils::digraph_t **const digraphs) {
const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs);
}
// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
// (which specifies the first or second codepoint in the digraph).
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
const DigraphCodePointIndex digraphCodePointIndex) {
if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
return NOT_A_CODE_POINT;
}
const DigraphUtils::digraph_t *const digraph =
DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
if (!digraph) {
return NOT_A_CODE_POINT;
}
if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
return digraph->first;
} else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
return digraph->second;
}
ASSERT(false);
return NOT_A_CODE_POINT;
}
// Retrieves the set of all digraphs associated with the given digraph type.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
const DigraphUtils::DigraphType digraphType,
const DigraphUtils::digraph_t **const digraphs) {
if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
*digraphs = GERMAN_UMLAUT_DIGRAPHS;
return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
}
if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
*digraphs = FRENCH_LIGATURES_DIGRAPHS;
return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
}
return 0;
}
/**
* Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
* compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
*/
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
const int compositeGlyphCodePoint) {
for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
if (digraph) {
return digraph;
}
}
return 0;
}
/**
* Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
* digraphType: the type of digraphs supported.
* compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
*/
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
const DigraphUtils::digraph_t *digraphs = 0;
const int digraphsSize =
DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
for (int i = 0; i < digraphsSize; i++) {
if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) {
return &digraphs[i];
}
}
return 0;
}
} // namespace latinime
|