1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
|
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kelar.inputmethod.latin.makedict;
import org.kelar.inputmethod.annotations.UsedForTesting;
import org.kelar.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import org.kelar.inputmethod.latin.utils.ByteArrayDictBuffer;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.TreeMap;
/**
* An interface of binary dictionary decoders.
*/
// TODO: Straighten out responsibility for the buffer's file pointer.
public interface DictDecoder {
/**
* Reads and returns the file header.
*/
public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException;
/**
* Reads PtNode from ptNodePos.
* @param ptNodePos the position of PtNode.
* @return PtNodeInfo.
*/
public PtNodeInfo readPtNode(final int ptNodePos);
/**
* Reads a buffer and returns the memory representation of the dictionary.
*
* This high-level method takes a buffer and reads its contents, populating a
* FusionDictionary structure.
*
* @param deleteDictIfBroken a flag indicating whether this method should remove the broken
* dictionary or not.
* @return the created dictionary.
*/
@UsedForTesting
public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
throws FileNotFoundException, IOException, UnsupportedFormatException;
/**
* Gets the address of the last PtNode of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD.
*
* @param word the word we search for.
* @return the address of the terminal node.
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
@UsedForTesting
public int getTerminalPosition(final String word)
throws IOException, UnsupportedFormatException;
/**
* Reads unigrams and bigrams from the binary file.
* Doesn't store a full memory representation of the dictionary.
*
* @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
@UsedForTesting
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
final TreeMap<Integer, Integer> frequencies,
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
throws IOException, UnsupportedFormatException;
/**
* Sets the position of the buffer to the given value.
*
* @param newPos the new position
*/
public void setPosition(final int newPos);
/**
* Gets the position of the buffer.
*
* @return the position
*/
public int getPosition();
/**
* Reads and returns the PtNode count out of a buffer and forwards the pointer.
*/
public int readPtNodeCount();
/**
* Opens the dictionary file and makes DictBuffer.
*/
@UsedForTesting
public void openDictBuffer() throws FileNotFoundException, IOException,
UnsupportedFormatException;
@UsedForTesting
public boolean isDictBufferOpen();
// Constants for DictionaryBufferFactory.
public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
public static final int USE_BYTEARRAY = 0x02000000;
public static final int USE_WRITABLE_BYTEBUFFER = 0x03000000;
public static final int MASK_DICTBUFFER = 0x0F000000;
public interface DictionaryBufferFactory {
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException;
}
/**
* Creates DictionaryBuffer using a ByteBuffer
*
* This class uses less memory than DictionaryBufferFromByteArrayFactory,
* but doesn't perform as fast.
* When operating on a big dictionary, this class is preferred.
*/
public static final class DictionaryBufferFromReadOnlyByteBufferFactory
implements DictionaryBufferFactory {
@Override
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
ByteBuffer buffer = null;
try {
inStream = new FileInputStream(file);
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
0, file.length());
} finally {
if (inStream != null) {
inStream.close();
}
}
if (buffer != null) {
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
}
return null;
}
}
/**
* Creates DictionaryBuffer using a byte array
*
* This class performs faster than other classes, but consumes more memory.
* When operating on a small dictionary, this class is preferred.
*/
public static final class DictionaryBufferFromByteArrayFactory
implements DictionaryBufferFactory {
@Override
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final byte[] array = new byte[(int) file.length()];
inStream.read(array);
return new ByteArrayDictBuffer(array);
} finally {
if (inStream != null) {
inStream.close();
}
}
}
}
/**
* Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
*
* This class doesn't perform as fast as other classes,
* but this class is the only option available for destructive operations (insert or delete)
* on a dictionary.
*/
@UsedForTesting
public static final class DictionaryBufferFromWritableByteBufferFactory
implements DictionaryBufferFactory {
@Override
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
RandomAccessFile raFile = null;
ByteBuffer buffer = null;
try {
raFile = new RandomAccessFile(file, "rw");
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
} finally {
if (raFile != null) {
raFile.close();
}
}
if (buffer != null) {
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
}
return null;
}
}
/**
* @return whether this decoder has a valid binary dictionary that it can decode.
*/
public boolean hasValidRawBinaryDictionary();
}
|