资讯详情

FST的简单应用

packagefst;

importjava.io.File;

importjava.io.FileInputStream;

importjava.io.IOException;

importjava.io.StringReader;

importjava.util.ArrayList;

importjava.util.HashMap;

importjava.util.List;

importjava.util.Map;

importorg.apache.lucene.analysis.TokenStream;

importorg.apache.lucene.analysis.core.WhitespaceTokenizer;

importorg.apache.lucene.analysis.synonym.SynonymFilterFactory;

importorg.apache.lucene.analysis.tokenattributes.CharTermAttribute;

importorg.apache.lucene.analysis.util.FilesystemResourceLoader;

importorg.apache.lucene.store.DataInput;

importorg.apache.lucene.store.InputStreamDataInput;

importorg.apache.lucene.util.BytesRef;

importorg.apache.lucene.util.CharsRef;

importorg.apache.lucene.util.IntsRef;

importorg.apache.lucene.util.NumericUtils;

importorg.apache.lucene.util.Version;

importorg.apache.lucene.util.fst.Builder;

importorg.apache.lucene.util.fst.ByteSequenceOutputs;

importorg.apache.lucene.util.fst.FST;

importorg.apache.lucene.util.fst.Util;

classFSTDic{

FSTfst;

FST.BytesReaderfstReader;

publicFSTDic()throwsIOException{

Filefile=newFile("fst");

if(file.exists()){

fst=load(file);

}else{

Listwords=newArrayList();

words.add("中国");

words.add("中国人");

words.add("中国人民");

words.add("中国人民解放军");

fst=build(words);

}

fstReader=fst.getBytesReader();

}

publicvoidsave()throwsIOException{

fst.save(newFile("fst"));

}

publicFSTload(Filefile)throwsIOException{

returnnewFST(newInputStreamDataInput(newFileInputStream("fst")),ByteSequenceOutputs.getSingleton());

}

privateFSTbuild(Listwords)throwsIOException{

ByteSequenceOutputsoutputs=ByteSequenceOutputs.getSingleton();

Builderbuilder=newBuilder(FST.INPUT_TYPE.BYTE4,outputs);

finalIntsRefscratchIntsRef=newIntsRef();

BytesRefoutput=newBytesRef(4);

for(Stringword:words){

NumericUtils.intToPrefixCodedBytes(word.length(),0,output);

builder.add(Util.toUTF32(word,scratchIntsRef),BytesRef.deepCopyOf(output));

}

returnbuilder.finish();

}

publicbooleancontains(Stringword)throwsIOException{

FST.ArcscratchArc=newFST.Arc();

intbufUpto=0,buflen=word.length();

BytesRefpendingOutput=fst.outputs.getNoOutput();

BytesRefmatchOutput=null;

fst.getFirstArc(scratchArc);

while(bufUpto

intcodePoint=Character.codePointAt(word,bufUpto);

if(fst.findTargetArc(codePoint,scratchArc,scratchArc,fstReader)!=null){

pendingOutput=fst.outputs.add(pendingOutput,scratchArc.output);

}else{

break;

}

bufUpto =Character.charCount(codePoint);

}

if(scratchArc.isFinal()){

matchOutput=fst.outputs.add(pendingOutput,scratchArc.nextFinalOutput);

intlen=NumericUtils.prefixCodedToInt(matchOutput);

System.out.println(len);

returntrue;

}

returnfalse;

}

publicstaticvoidmain(String[]args)throwsIOException{

FSTDicdic=newFSTDic();

//dic.save();

System.out.println(dic.contains("中国"));

System.out.println(dic.contains("中国人"));

System.out.println(dic.contains("中国人民"));

System.out.println(dic.contains("中国人民解放军"));

}

}

(随记,稍后补齐……)

标签: 荔波智能电容器fst

锐单商城拥有海量元器件数据手册IC替代型号,打造 电子元器件IC百科大全!

锐单商城 - 一站式电子元器件采购平台