http://qinwenfeng.com/jiebaR/
library(jiebaR) wkr = worker() segment("今天天气好晴朗", wkr)
library(jiebaR) library(sqldf) TA = read.csv('R/table-A.csv', header = TRUE, sep = ",") txtdf = TA$BAK_TXT TA$BAK_TXT <- as.character(TA$BAK_TXT) wkr = worker() # vector words = c() for( txt in txtdf ){ # add new segment into words words <- c(words, segment(txt, wkr) ) } #jieba functions freqrs <- freq(words) # sort and count rs <- table(words) # convert to data frame rsdf <- as.data.frame(rs) rsdf$words <- as.character(rsdf$words) lowChar <- grep("[a-z]", rsdf$words) upperChar <- grep("[A-Z]", rsdf$words) numbers <- grep("[0-9]", rsdf$words) # check encoding #Encoding( rsdf$words) rowNums <- c(lowChar, upperChar, numbers) # delete duplicate row numbers rowNums <- unique(rowNums) # selelct none char/number rows chrs <- rsdf[-rowNums,] # check the length of string #nchar(chrs$words) nwords <- dim(chrs)[1] nrow = dim(TA)[1] for( i in 1:nwords ){ word <- chrs$words[i] wordCols <- character() for( j in 1:nrow ){ flg <- grepl(word, TA$BAK_TXT[j]) if( flg == TRUE ){ wordCols <- c( wordCols, "Y" ) }else{ wordCols <- c( wordCols, "N" ) } } wordCols <- as.data.frame(wordCols) names(wordCols) <- word TA <- cbind(TA, wordCols) } write.csv(TA, file = "rs-words.csv") |
https://www.r-bloggers.com/r-function-of-the-day-table/
推荐阅读
- r语言|手把手(R语言文本挖掘和词云可视化实践)
- R语言从入门到机器学习|R语言rename重命名dataframe的列名实战:rename重命名dataframe的列名(写错的列名不会被重命名)
- R下载安装,Linux版
- R|不同方法的正态性检验及R语言实现
- 模型评估
- 信息增益率
- R - dplyr 包
- r语言|R中处理空间面板模型的包spdep的用法
- R语言|电力窃漏电用户自动识别 细节