本人最近整合了从文本中提取信息,将之保存的新的文本中,在此处用到的是正则表达式,希望大家一起学习.
文件操作类:fileOperation.java
package dyx_13; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; public class fileOperation { // 创建文件,检测文件是否创建 public boolean crateFile(File fileName) { boolean flag = false; try { if(!fileName.exists()) { fileName.createNewFile();// 文件没有出现创建新文件 flag = true; } }catch(Exception e) { e.printStackTrace(); } return true; } // 读取txt文件 public static String readTxtFile(File fileName) { StringBuffer str = new StringBuffer(); String result = null; FileReader fileReader = null;// 读取文件 BufferedReader bufferReader = null;// 读取缓冲流 try { fileReader = new FileReader(fileName); bufferReader = new BufferedReader(fileReader); try { for(String line;(line = bufferReader.readLine())!=null;) { str.append(line).append("n"); } result = str.toString(); }catch(Exception e) { e.printStackTrace(); } }catch(Exception e) { e.printStackTrace(); } //System.out.println("读出来的内容是:"+ result); return result; } // 判定文件权限 public boolean writeTxtFile(String content,File fileName) { //RandomAccessFile mm = null; boolean flag = false; FileOutputStream o = null; try{ o = new FileOutputStream(fileName);// 文件要输出 o.write(content.getBytes("GBK")); o.close(); flag = true; }catch(Exception e) { e.printStackTrace(); } return flag; } // 写入文件 public static void write(String filePath,String content) throws IOException { File file2 = new File(filePath);//取得文件路径 BufferedWriter out = null; try { out = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file2,true))); out.write(content+"rn"); } catch (Exception e) { e.printStackTrace(); } finally { try { out.close(); } catch (IOException e) { e.printStackTrace(); } } } // 清空文本信息 public static void clear(String path) throws IOException { File f = new File(path); FileWriter fw = new FileWriter(f); fw.write(""); fw.close(); } public static void contentToTxt(String filePath1,String filePath2) { StringBuffer str = new StringBuffer(); // 原有TXT文件 String s1 = new String();// 新加入的内容 try { File file1 = new File(filePath1);//取得文件路径 if(file1.exists()) { System.out.println("源文件存在"); } else { System.out.println("源文件不存在"); file1.createNewFile();// 不存在创建文件 } File file2 = new File(filePath2);//取得文件路径 if(file2.exists()) { System.out.println("目标文件存在"); } else { System.out.println("目标文件不存在"); file1.createNewFile();// 不存在创建文件 } BufferedReader input = new BufferedReader(new FileReader(file1)); for(String line;(line = input.readLine())!=null;) { str.append(line).append("n"); } s1 = str.toString(); // String emailStr = regExp.email(s1); // String teleStr = regExp.tele(s1); // String netAddStr = regExp.netAddress(s1); // String idStr = regExp.idCard(s1); // System.out.println(s1); input.close(); BufferedWriter output = new BufferedWriter(new FileWriter(file2)); // output.write(emailStr); // output.write(teleStr+"n"); // output.write(netAddStr+"n"); // output.write(idStr+"n"); output.close(); }catch(Exception e) { e.printStackTrace(); } } }
正则表达式处理:regExp.java
package dyx_13; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; public class regExp { public static String email(String filePath,String content) throws IOException { String regex = "w+@w+(.w+)+"; String email = null; Matcher m=Pattern.compile(regex).matcher(content); while(m.find()){ email = m.group(); fileOperation.write(filePath,email); System.out.println(email); } return email; } // 电话号码 public static String tele(String filePath,String content) throws IOException { String telephone = null; String regex = "0?(13[0-9]|15[012356789]|18[0236789]|14[57])[0-9]{8}"; Matcher m=Pattern.compile(regex).matcher(content); while(m.find()){ telephone = m.group(); fileOperation.write(filePath,telephone); System.out.println(telephone); } return telephone; } // 网址 public static String netAddress(String filePath,String content) throws IOException { String netAdd = null; String regex = "(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]" ; Matcher m=Pattern.compile(regex).matcher(content); while(m.find()){ netAdd = m.group(); fileOperation.write(filePath,netAdd); System.out.println(netAdd); } return netAdd; } // 身份证 public static String idCard(String filePath,String content) throws IOException { String id = null; String regex = "(d{14}w)|d{17}w" ; Matcher m=Pattern.compile(regex).matcher(content); while(m.find()){ id = m.group(); fileOperation.write(filePath,id); System.out.println(id); } return id; } } 运行主程序zhu.java
package dyx_13; import java.io.File; import java.io.IOException; public class zhu { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { // TODO Auto-generated method stub //o.contentToTxt(s1,s2);fileOperation o = new fileOperation(); String s1 = "D:java workspacedyxJAVA classdyx_2contentfile.txt"; String s2 = "D:java workspacedyxJAVA classdyx_2contentoutfile.txt"; fileOperation.clear(s2); File f = new File(s1); String s = fileOperation.readTxtFile(f); System.out.println(s); regExp.email(s2,s); regExp.tele(s2,s); regExp.netAddress(s2,s); regExp.idCard(s2,s); } } (编辑:李大同)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|