SetString set = new TreeSetString();
for(String word: rawWords){
set.add(word);
}
Iterator ite = set.iterator();
ListString wordsList = new ArrayListString();
ListInteger freqList = new ArrayListInteger();
//多少个字符串未知,所以用list来保存先
while(ite.hasNext()){
String word = (String) ite.next();
int count = 0;//统计相同字符串的个数
for(String str: rawWords){
if(str.equals(word)){
count++;
}
}
wordsList.add(word);
freqList.add(count++);
}
//存入数组当中
words = wordsList.toArray(new String[0]);
wordFreqs = new int[freqList.size()];
for(int i = 0; ifreqList.size(); i++){
wordFreqs[i] = freqList.get(i);
}
}
//根据词频 , 将词数组和词频数组进行降序排序
public void sort() {
class Word{
private String word;
private int freq;
public Word(String word, int freq){
this.word = word;
this.freq = freq;
}
}
//注意:此处排序 , 1)首先按照词频降序排列,2)如果词频相同,按照字母降序排列,
//如 'abc''ab' 'aa'
class WordComparator implements Comparator{
public int compare(Object o1, Object o2) {
Word word1 = (Word) o1;
Word word2 = (Word) o2;
if(word1.freqword2.freq){
return 1;
}else if(word1.freqword2.freq){
return -1;
}else{
int len1 = word1.word.trim().length();
int len2 = word2.word.trim().length();
【java代码词频 java词频统计】String min = len1len2? word2.word: word1.word;
String max = len1len2? word1.word: word2.word;
for(int i = 0; imin.length(); i++){
if(min.charAt(i)max.charAt(i)){
return 1;
}
}
return 1;
}
}
}
List wordList = new ArrayListWord();
for(int i = 0; iwords.length; i++){
wordList.add(new Word(words[i], wordFreqs[i]));
}
Collections.sort(wordList, new WordComparator());
for(int i = 0; iwordList.size(); i++){
Word wor = (Word) wordList.get(i);
words[i] = wor.word;
wordFreqs[i] = wor.freq;
}
}
//将排序结果输出
public void printResult() {
System.out.println("Total " + words.length + " different words in the content!");
for(int i = 0; iwords.length; i++){
System.out.println(wordFreqs[i] + "" + words[i]);
}
}
//测试类的功能
public static void main(String[] args) {
Article a = new Article();
a.splitWord();
a.countWordFreq();
a.sort();
a.printResult();
}
}
-----------------------
Total 99 different words in the content!
5and
4the
4i
4a
3as
2with
2who
2to
2time
2sverak
2son
2s
2old
2of
2it
2in
2his
2czech
1zdenek
1year
1wrote
1writing
1won
1whining
1while
1wanted
1walked
1ve
1values
1though
1this
1these
1that
1than
1taking
1subtitles
1spend
1some
1so
1seen
1script
1saw
1russian
1richest
1remain
1rather
1production
1plays
1oscar
1one
1not
1more
1m
1likely
1life
1language
1kolya
1jan
1is
1increasingly
1impacted
1if
1higher
1high
1he
1golden
1globe
1foreign
1for
1five
1finds
1films
1film
1father
1english
1ends
1dramas
1directed
1delight
1days
1couple
1confirmed
1comparable
1characters
推荐阅读
- excel如何删除重复的值,excel如何删除重复值的颜色
- 微信如何运营新媒体,微信新媒体营销特点
- 区块链沙雕事件,区块链沙龙
- 浪漫碎片无人直播,浪漫碎片无人直播是真的吗
- 查看python所有函数 python查看函数方法
- 汽车运货公司java代码,货运系统源码
- flutter找不到路由,flutter搜索功能
- 深圳捕鱼游戏开发公司,深圳捕鱼游戏开发公司有哪些
- len函数python len函数python返回值float