用Lucene做一个简单的Java搜索工具
2007-12-23 12:27:00 来源:WEB开发网核心提示: 初学LUCene,刚接触搜索引擎,用Lucene做一个简单的Java搜索工具,知道了一点点,想做个小工具,双击查询结果的某条记录可以打开相应文件,4、性能方面索引文件时,实现根据“单词”搜索某个java源文件,比如输入“String”去查询某些java源文件里用到了这个类
初学LUCene,刚接触搜索引擎。知道了一点点,想做个小工具,实现根据“单词”搜索某个java源文件。比如输入“String”去查询某些java源文件里用到了这个类。
这个想法的来源是,在以前刚学java时,有一本java基础教程的书的附带光盘里有作者写的一个程序,可以方便初学者查找某些类在哪个实例里出现。当时没有太在意,觉得作者的代码很长。所以现在想自己也写一个这样的小程序。
开发工具与运行环境:使用Lucene2.0的包,jdk1.5,在WindowsXP下运行。
思路分析与设计:
整个程序里,除了Lucene的必要操作外,就是IO的基本操作了。因为要对某目录下及其子目录下的所有Java源文件进行索引,就要用到递归,同时要过滤掉非Java源文件。根据这种情况,设计了以下5个类。
主类:索引类(IndexJavaFiles),搜索类(SearchJavaFiles) 异常类:索引异常类(IndexException),搜索异常类(SearchException) 还有一个文件过滤工厂类(FileFilterFactory)。
异常类不是必要的,特意设计来包装IO异常、文件异常和Lucene的异常。文件过滤工厂类的出现并不是故弄玄虚,只是不想太多代码集中一起,就把文件过虑器的设计放到一个类里。下面是程序的完整代码及注释。
IndexJavaFiles.java /** *indexthejavasourcefiles */ package powerwind; import java.io.*; import java.util.Date; import org.apache.lucene.document.*; import org.apache.lucene.index.IndexWriter; /** *@authorPowerwind *@version1.0 */ publicclass IndexJavaFiles { /** *默认构造方法 */ public IndexJavaFiles() { } /** * 这个私有递归方法由index方法调用,保证index传入的file是目录不是文件 * *@paramwriter *@paramfile *@paramff *@throwsIndexException */ PRivatevoid indexDirectory(IndexWriter writer, File file, FileFilter filter)throws IndexException { if (file.isDirectory()) { // 有选择地(过滤)获取目录下的文件和目录 File[] files = file.listFiles(filter); // 非空目录 if (files != null) { for (int i = 0; i < files.length; i++) { indexDirectory(writer, files[i], filter); } } } else { try { // 这里的file经过先前的过滤 writer.addDocument(parseFile(file)); System.out.println("增加文件: " + file); } catch (IOException ioe) { thrownew IndexException(ioe.getMessage()); } } } /** *传参数是文件就直接索引,若是目录则交给indexDirectory递归 * *@paramwriter *@paramfile *@paramff *@throwsIndexException */ publicvoid index(IndexWriter writer, File file, FileFilter filter) throws IndexException { // 确定可读 if (file.exists() && file.canRead()) { if (file.isDirectory()) { indexDirectory(writer, file, filter); } elseif (filter.accept(file)) { try { writer.addDocument(parseFile(file)); System.out.println("增加文件: " + file); } catch (IOException ioe) { thrownew IndexException(ioe.getMessage()); } } else { System.out.println("指定文件或目录错误,没有完成索引"); } } } /** *@paramfile * *把File变成Document */ private Document parseFile(File file) throws IndexException { Document doc = new Document(); doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.UN_TOKENIZED)); try { doc.add(new Field("contents", new FileReader(file))); } catch (FileNotFoundException fnfe) { thrownew IndexException(fnfe.getMessage()); } return doc; } } 进入讨论组讨论。index(IndexWriter writer, File file, FileFilter filter)调用私有方法indexDirectory(IndexWriter writer, File file, FileFilter filter)完成文件的索引。 下面是IndexException异常类。 IndexException.java package powerwind; publicclass IndexException extends Exception { public IndexException(String message) { super("Throw IndexException while indexing files: " + message); } } 下面是FileFilterFactory类,返回一个特定的文件过滤器(FileFilter)。 FileFilterFactory.java package powerwind; import java.io.*; publicclass FileFilterFactory { /** *静态匿名内部类 */ privatestatic FileFilter filter = new FileFilter() { publicboolean accept(File file) { long len; return file.isDirectory() (file.getName().endsWith(".java") && ((len = file.length()) > 0) && len < 1024 * 1024); } }; publicstatic FileFilter getFilter() { returnfilter; } } main方法 /** * main方法 */ publicstaticvoid main(String[] args) throws Exception { IndexJavaFiles ijf = new IndexJavaFiles(); Date start = new Date(); try { IndexWriter writer = IndexWriterFactory.newInstance().createWriter("./index", true); System.out.println("Indexing ..."); ijf.index(writer, new File("."), FileFilterFactory.getFilter()); System.out.println("Optimizing..."); writer.optimize(); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } SearchJavaFiles.java package powerwind; import java.io.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.*; import org.apache.lucene.search.*; publicclass SearchJavaFiles { private IndexSearcher searcher; private QueryParser parser; /** * *@paramsearcher */ public SearchJavaFiles(IndexSearcher searcher) { this.searcher = searcher; } /** * *@paramfield *@paramanalyzer */ publicvoid setParser(String field, Analyzer analyzer) { setParser(new QueryParser(field, analyzer)); } /** *@paramparser */ publicvoid setParser(QueryParser parser) { this.parser = parser; } /** * *@paramquery *@returnHits *@throwsSearchException */ public Hits serach(Query query) throws SearchException { try { returnsearcher.search(query); } catch (IOException ioe) { thrownew SearchException(ioe.getMessage()); } } /** * *@paramqueryString *@returnHits *@throwsSearchException */ public Hits serach(String queryString) throws SearchException { if (parser == null) thrownew SearchException("parser is null!"); try { returnsearcher.search(parser.parse(queryString)); } catch (IOException ioe) { thrownew SearchException(ioe.getMessage()); } catch (ParseException pe) { thrownew SearchException(pe.getMessage()); } } /** * *输出hits的结果,从start开始到end,不包括end * *@paramhits *@paramstart *@paramend *@throwsSearchException */ publicstatic Hits display(Hits hits, int start, int end) throws SearchException { try { while (start < end) { Document doc = hits.doc(start); String path = doc.get("path"); if (path != null) { System.out.println((start + 1) + "- " + path); } else { System.out.println((start + 1) + "- " + "No such path"); } start++; } } catch (IOException ioe) { thrownew SearchException(ioe.getMessage()); } return hits; } 进入讨论组讨论。
main方法 /** *@paramargs */ publicstaticvoid main(String[] args) throws Exception { String field = "contents"; String index = "./index"; finalint rows_per_page = 2; finalchar NO = 'n'; SearchJavaFiles sjf = new SearchJavaFiles(new IndexSearcher(IndexReader.open(index))); sjf.setParser(field, new StandardAnalyzer()); BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); while (true) { System.out.println("Query: "); String line = in.readLine(); if (line == null line.length() < 2) { System.out.println("eixt query"); break; } Hits hits = sjf.serach(line); System.out.println("searching for " + line + " Result is "); int len = hits.length(); int i = 0; if (len > 0) while (true) { if (i + rows_per_page >= len) { SearchJavaFiles.display(hits, i, len); break; } else { SearchJavaFiles.display(hits, i, i += rows_per_page); System.out.println("more y/n?"); line = in.readLine(); if (line.length() < 1 line.charAt(0) == NO) break; } } else System.out.println("not found"); } } } SearchException.java package powerwind; publicclass SearchException extends Exception { public SearchException(String message) { super("Throw SearchException while searching files: " + message); } }
(出处:http://www.cncms.com)
赞助商链接