package com.rokid.server.framework.test;
import java.io.IOException; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.RAMDirectory;
public class Example {
public static IndexWriter writer; public static RAMDirectory idxDir; public static SmartChineseAnalyzer analyzer;
public static void makeIndex() throws IOException {
FSDirectory fsDir = FSDirectory.open(Paths.get("C:\\Users\\gt\\Desktop\\_ROKID_\\fast-sent-match\\example")); idxDir = new RAMDirectory(fsDir, IOContext.DEFAULT); analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setSimilarity(new BM25Similarity());
iwc.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(idxDir, iwc);
List listSent = new ArrayList(); listSent.add("战旗春秋是最好玩的战棋游戏"); listSent.add("金古江湖是最好玩的金庸游戏"); for (String sent : listSent) { Document doc = new Document(); doc.add(new TextField("questionID", String.valueOf(1), Field.Store.YES)); doc.add(new TextField("question", sent.trim(), Field.Store.YES)); writer.addDocument(doc); }
List listSent2 = new ArrayList(); listSent2.add("战旗春秋是最好玩的手机游戏"); listSent2.add("金古江湖是最好玩的角色扮演游戏"); for (String sent : listSent2) { Document doc = new Document(); doc.add(new TextField("questionID", String.valueOf(2), Field.Store.YES)); doc.add(new TextField("question", sent.trim(), Field.Store.YES)); writer.addDocument(doc); }
writer.commit();
int nMaxDoc = writer.maxDoc(); System.out.println("nMaxDoc = " + nMaxDoc); writer.close(); }
public static void main(String[] args) throws IOException, ParseException { makeIndex();
// String[] stringQuery={"2","金古江湖"}; //通过1,2来筛选 String[] stringQuery = { "1", "金古江湖" }; String[] fields = { "questionID", "question" }; Occur[] occ = { Occur.MUST, Occur.MUST }; Query query = MultiFieldQueryParser.parse(stringQuery, fields, occ, analyzer);
TopDocs results = null; IndexReader reader = DirectoryReader.open(idxDir); IndexSearcher searcher = new IndexSearcher(reader); results = searcher.search(query, 3); ScoreDoc[] hits = results.scoreDocs; for (int i = 0; i < hits.length; ++i) { Document doc = searcher.doc(hits[i].doc); String strDocSent = doc.get("question"); System.out.println(strDocSent); } } }