java 字符串词频统计实例代码
发布时间:2020-12-14 05:21:41 所属栏目:Java 来源:网络整理
导读:复制代码 代码如下: package com.gpdi.action; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; public class WordsStatistics { class Obj { int count ; Obj(int cou
复制代码 代码如下: package com.gpdi.action; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; public class WordsStatistics { class Obj { int count ; Obj(int count){ this.count = count; } } public List<WordCount> statistics(String word) { List<WordCount> rs = new ArrayList<WordCount>(); Map <String,Obj> map = new HashMap<String,Obj>(); if(word == null ) { return null; } word = word.toLowerCase(); word = word.replaceAll("'s",""); word = word.replaceAll(",",""); word = word.replaceAll("-",""); word = word.replaceAll(".",""); word = word.replaceAll("'",""); word = word.replaceAll(":",""); word = word.replaceAll("!",""); word = word.replaceAll("n",""); String [] wordArray = word.split(" "); for(String simpleWord : wordArray) { simpleWord = simpleWord.trim(); if (simpleWord != null && !simpleWord.equalsIgnoreCase("")) { Obj cnt = map.get(simpleWord); if ( cnt!= null ) { cnt.count++; }else { map.put(simpleWord,new Obj(1)); } } } for(String key : map.keySet()) { WordCount wd = new WordCount(key,map.get(key).count); rs.add(wd); } Collections.sort(rs,new java.util.Comparator<WordCount>(){ @Override public int compare(WordCount o1,WordCount o2) { int result = 0 ; if (o1.getCount() > o2.getCount() ) { result = -1; }else if (o1.getCount() < o2.getCount()) { result = 1; }else { int strRs = o1.getWord().compareToIgnoreCase(o2.getWord()); if ( strRs > 0 ) { result = 1; }else { result = -1 ; } } return result; } }); return rs; } public static void main(String args[]) { String word = "Pinterest is might be aa ab aa ab marketer's dream - ths site is largely used to curate products " ; WordsStatistics s = new WordsStatistics(); List<WordCount> rs = s.statistics(word); for(WordCount word1 : rs) { System.out.println(word1.getWord()+"*"+word1.getCount()); } } } (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |