lucene 是一个全文检索的纯java的工具,对中文分词支持不好,需要其他分词组件支持 如 ikanalysize , paoding ,je 等。
lucene 的基本流程,首先需要建立索引 index , 然后就是搜索索引 search 。
下面放出一个入门的例子
public void testIndexAndSearchold() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException{ Analyzer analyzer = new StandardAnalyzer();// Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.getDirectory("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.optimize(); iwriter.close();
// Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.parse("text"); TopDocCollector hits = new TopDocCollector(0); isearcher.search(query,hits); assertEquals(1, hits.getTotalHits()); // Iterate through the results: for (int i = 0; i < hits.getTotalHits(); i++) { Document hitDoc = isearcher.doc(i); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } isearcher.close(); directory.close();
}