开发学院 WEB开发 Jsp A simple example about full-text search based ... 阅读

A simple example about full-text search based Java:Lucene

　2008-01-05 08:50:37　来源：WEB开发网　　　

索引源代码：
package lUCene;

/**
* Title: 
* Description: 
* Copyright: Copyright (c) 2003
* Company: 
* @author Shirley
* @version 1.0
*/

import org.apache.lucene.index.*;
import org.apache.lucene.analysis.*;
import java.io.*;
import org.apache.lucene.document.*;

public class IndexFiles {
　//使用方法：: IndexFiles [索引输出目录] [索引的文件列表] ...
　public static void main(String[] arg) throws Exception {
　　String[] args = new String[2];
　　//索引后存放索引信息的路径
　　args[0] = System.getPRoperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "index-1";
　　//待索引文件
　　args[1] = "E:\\AppWork\\lucene\\rfc2047.txt";
　　args[2] = "E:\\AppWork\\cyberOffice\\CO\\Sheldon Java Mail.htm";
　　args[3] = "E:\\AppWork\\lucene\\englishtest.doc";
　　args[4] = "E:\\AppWork\\cyberoffice\\CO\\xls1.xls";
　　args[5] = "E:\\AppWork\\cyberoffice\\CO\\ppt1.ppt";

　　String indeXPath = args[0];
　　IndexWriter writer;
　　//用指定的语言分析器构造一个新的写索引器（第3个参数表示是否为追加索引）
　　writer = new IndexWriter(indexPath, new SimpleAnalyzer(), false);

　　for (int i=1; i<args.length; i++) {
　　　System.out.println("Indexing file " + args[i]);
　　　InputStream is = new FileInputStream(args[i]);

　　　//构造包含2个字段Field的Document对象
　　　//一个是路径path字段，不索引，只存储
　　　//一个是内容body字段，进行全文索引，并存储
　　　Document doc = new Document();
　　　doc.add(Field.UnIndexed("path", args[i]));
　　　doc.add(Field.Text("body", (Reader) new InputStreamReader(is)));
　　　//将文档写入索引