import org.apache.lucene.util.fst.FST; //导入方法依赖package包/类
/**
*
* @param inputStream the input stream
* @param outputStream the output stream
* @throws IOException if compilation fails
*/
public void compile(InputStream inputStream, OutputStream outputStream) throws IOException {
final HashSet words = new HashSet<>();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
String line;
String last = null;
StringBuilder stringBuilder = new StringBuilder();
while ((line = reader.readLine()) != null) {
if (line.indexOf('#') >= 0) {
continue;
}
line = pattern.split(line)[0].trim();
line = line.toLowerCase();
if (line.equals(last)) {
continue;
}
last = line;
/*
* Add the word to the hash set in left-to-right characters order and reversed
* for easier matching later on.
*/
stringBuilder.setLength(0);
stringBuilder.append(line);
final int len = stringBuilder.length();
stringBuilder.append('>');
words.add(new BytesRef(stringBuilder));
stringBuilder.setLength(len);
stringBuilder.reverse().append('
words.add(new BytesRef(stringBuilder));
}
reader.close();
final BytesRef [] all = new BytesRef[words.size()];
words.toArray(all);
Arrays.sort(all, BytesRef::compareTo);
final Object nothing = NoOutputs.getSingleton().getNoOutput();
final Builder builder = new Builder<>(INPUT_TYPE.BYTE4, NoOutputs.getSingleton());
final IntsRefBuilder intsRef = new IntsRefBuilder();
for (BytesRef bytesRef : all) {
intsRef.clear();
intsRef.copyUTF8Bytes(bytesRef);
builder.add(intsRef.get(), nothing);
}
final FST fst = builder.finish();
try (final OutputStreamDataOutput out = new OutputStreamDataOutput(outputStream)) {
fst.save(out);
}
}