加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 站长学院 > MsSql教程 > 正文

[编程实例] Lucene Demo 小实例

发布时间:2020-12-12 16:01:44 所属栏目:MsSql教程 来源:网络整理
导读:首先是文件索引: 先用爬虫爬几个页面放在特定目录下,我挑取了 google,baidu,yahoo 的代码来做例子:) Writer.java public ? class ?Writer? ... { ???? public ? static void ?main(String[]?args)? throws ?Exception ???? ... { ???????? // fileDir?is
Table users
PK id ? name ? pass ? updatetime
DBIndexer.java
?DBIndexer?

private?String?drivercom.microsoft.sqlserver.jdbc.SQLServerDriver;

????
?String?urljdbc:sqlserver://localhost:1433;databaseName=mydb;?String?usersa?String?pass123456?Connection?conn?Statement?st?ResultSet?rs?String?indexUrlD:/work/index/mydb;


????
?ResultSet?getResult()??Exception{????????

????????
try?{

????????????Class.forName(driver);

????????????conn?
?DriverManager.getConnection(url,?user,?pass);

????????????String?sql?
select?*?from?users;

????????????st?
?conn.createStatement();

????????????rs?
?st.executeQuery(sql);

????????????while?(rs.next())?{

????????????????System.out.print(rs.getInt("id")?+?" ");

????????????????System.out.print(rs.getString("name")?+?" ");

????????????????System.out.print(rs.getString("pass")?+?" ");

????????????????System.out.print(rs.getDate("updatetime")?+?" ");

????????????}

????????}

????????
catch?(Exception?e)?{

????????????e.printStackTrace();

????????}

????????
return?rs;

????}


????
?executeIndex(ResultSet?rs,?IndexWriter?indexWriter)??Exception?i;

????????
while(rs.next()){

????????????
?id??rs.getInt(id);

????????????String?name?
?rs.getString(name);

????????????String?time?
updatetime);

????????????


????????????Document?doc?
?Document();


????????????Field?idField

????????????Field?nameField

????????????Field?timeField
time


????????????doc.add(idField);

????????????doc.add(nameField);

????????????doc.add(timeField);


????????????indexWriter.addDocument(doc);


????????????i
;

????????}
this.close();

????????System.out.println(
共处理记录:i);

????}


????
?close()??Exception.rs.close();

????????
.st.close();

????????
.conn.close();

????}


????
?createIndex()??Exception?get?data?ResultSet????????ResultSet?rs.getResult();


????????Analyzer?chineseAnalyzer?=?new?ChineseAnalyzer();????????Analyzer?chineseAnalyzer??IndexWriter(.indexUrl,?chineseAnalyzer,0);">);

????????indexWriter.setMergeFactor(
100);

????????indexWriter.setMaxBufferedDocs(
);


????????java.util.Date?startDate
?java.util.Date();


????????System.out.println(
开始索引时间:?startDate);


????????executeIndex(rs,?indexWriter);


????????indexWriter.optimize();


????????indexWriter.close();


????????java.util.Date?endDate
索引结束时间:?endDate);

????????System.out.println(
共花费:?(endDate.getTime()startDate.getTime())?ms);

????}

????

????
?main?(String?args[])??Exception?{

????????DBIndexer?oIndexer?
?DBIndexer();

????????oIndexer.createIndex();

????}

}

DBSearcher.java

?DBSearcher?{

????

????
;


????
?Exception?{

????????

????????
/*建立索引代码,查找时注释*/Index?index=new?Index();

????????
index.createIndex();

????????File?indexDir?
?File(indexUrl);

????????FSDirectory?fdir?
?FSDirectory.getDirectory(indexDir);


????????IndexSearcher?searcher?
?IndexSearcher(fdir);


????????
对中文建立解析(必须)

?StandardAnalyzer();

????????QueryParser?parser?

????????Query?query?
?parser.parse(石头);


????????Date?startDate?
?Date();

????????System.out.println(
检索开始时间:startDate);


????????Hits?result?
?searcher.search(query);


????????
;?iresult.length();?i){

????????????Document?doc?
?result.doc(i);

????????????System.out.println(
用户ID:?doc.get()??更新时间:));

????????}


????????Date?endDate
?Date();


????????System.out.println(
共有记录:?result.length());

????????System.out.println(


}

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

首先是文件索引:

先用爬虫爬几个页面放在特定目录下,我挑取了 google,baidu,yahoo 的代码来做例子:)

Writer.java

public ? class ?Writer?

{

????
public?staticvoid?main(String[]?args)?throws?Exception

????
{

????????
//fileDir?is?the?directory?that?contains?the?text?files?to?be?indexed

????????File?fileDir?=new?File("D:/work/source);


????????
indexDir?is?the?directory?that?hosts?Lucene's?index?files????????File?indexDir?D:/work/index);

????????Analyzer?luceneAnalyzer?
?StandardAnalyzer();

????????IndexWriter?indexWriter?
?IndexWriter(indexDir,luceneAnalyzer,true);

????????File[]?textFiles?
?fileDir.listFiles();

????????System.out.println(
Total?indexed?"+?textFiles.length?"?files?! );

????????

????????
long?startTime??Date().getTime();


????????
Add?documents?to?the?index

????????for(int?i?0;?i?<?textFiles.length;?i++){

????????????
if?(textFiles[i].isFile()?&&?textFiles[i].getName().endsWith(.htm))?{

????????????????System.out.println(
File??textFiles[i].getCanonicalPath()??is?being?indexed);

????????????????Reader?textReader?
?FileReader(textFiles[i]);

????????????????Document?document?
?Document();

????????????????document.add(
?Field(content,textReader));

????????????????document.add(
path

????????????????indexWriter.addDocument(document);

????????????}

????????}


????????indexWriter.optimize();

????????indexWriter.close();

????????
?endTime??Date().getTime();


????????System.out.println(
It?took??(endTime?-?startTime)

????????????
?milliseconds?to?create?an?index?for?the?files?in?the?directory?

????????????
?fileDir.getPath());

????}

}

Searcher.java
?Searcher?

final?String?path?;

????

????
?Exception

????
{

????????IndexSearcher?searcher?
?IndexSearcher(path);

????????Hits?hits?
null;

????????Query?query?
;

????????QueryParser?qp?
?QueryParser(?StandardAnalyzer());

????????

????????String?searchText?
yahoo?job?google?baidu;

????????

????????query?
?qp.parse(searchText);

????????hits?
?searcher.search(query);

????????System.out.println(
Search?"?searchText?"?total??hits.length()??result?! );

????????
?(Iterator?it??hits.iterator();?it.hasNext();?)?{

????????????Hit?hit?
?(Hit)?it.next();

????????????System.out.println(hit.getDocument().getField(
).stringValue());

????????}

????}

}
?
以下是一个 DB 索引的例子,大家可以看看:

数据库环境:SQL Server 2005
数据库名称:mydb
数据库表:users
表结构:
    推荐文章
      热点阅读