공유자료 HOME > 자료실 > 공유자료
 
[재능기부]워드클라우드
관리자 17-08-12 09:37 655
install.packages("twitteR")
install.packages("KoNLP")
install.packages("wordcloud")
install.packages("qgraph")


library("twitteR")


consumerKey <- "Nj26Do961BvA7fEKA2X6l4MSU"
consumerSecret <- "TiyR37m7fwar5dT1E8cFBHtYIym8vvJl4vGtIssbVbwGFw7ekL"
accessToken <- "869790446251155456-KCpaQ6riybwFJyZTL4gYmuJ4B6CgQ8c"
accessTokenSecret <- "O6bgUTIKkmOtBEHuJW0mciKaRmH9i2vgi9xgh5OemvFzA"

setup_twitter_oauth(consumerKey, consumerSecret, accessToken, 
                    accessTokenSecret)

keyword <- "오버워치"
keyword <- enc2utf8(keyword)

result<-searchTwitter(keyword, lang="ko",n=10000)




library(KoNLP)
library(wordcloud)
library(RColorBrewer)

stuff_to_remove <- c("http[s]?://[[:alnum:].\\/]+", 
                     "@[\\w]*", "<.*>", "'s", "\\(","\\)", 
                     "#",":","ㄱ-ㅎ")
stuff_to_remove <-  paste(stuff_to_remove, sep = "|", collapse="|")

result <- sapply(result, function(x) x$getText())

result
result <- gsub(stuff_to_remove," ", result)
result <- gsub("[ㄱ-ㅎ]"," ",result)
result <- gsub("[^가-힣0-9A-z]"," ",result)
result <- gsub("RT"," ",result)
result <- gsub("https"," ",result)
result <- gsub("_"," ",result)
result

result <- gsub("#"," ",result)
noun <- sapply(result, extractNoun, USE.NAMES = F)
noun

nounVec <- unlist(noun)
nounVec <- Filter(function(x){nchar(x)>=2}, nounVec)
nounVec <- gsub("RT","",nounVec)
nounVec <- gsub("https","",nounVec)

nounFrame <- data.frame(nounVec)
nounFreq <- table(nounFrame)
head(sort(nounFreq, decreasing = T), 100)


pal <- brewer.pal(3, "Set2")
wordcloud(names(nounFreq), freq=nounFreq, scale=c(5.5, 1), rot.per = 0.5, min.freq = 3, random.order = F, random.color=T, colors=pal, max.words = 100)

getwd()
txt <- readLines("KakaoTalkChats3.txt", encoding="UTF-8")
txt
txt <- gsub("[0-9]{4}[년][ ][0-9]{1,2}[월][ ][0-9]{1,2}[일][ ][가-힣]{2}[ ][0-9]{1,2}[:][0-9]{1,2}","",txt)
txt <- gsub("[,][ ]\\S{1,}[ ][:][ ]","",txt)
txt <- gsub("[ㄱ-ㅎ]"," ",txt)
txt <- gsub("[^가-힣0-9A-z]"," ",txt)

txt <- gsub("[A-z]"," ",txt)

noun2 <- sapply(txt, extractNoun, USE.NAMES = F)
noun2
nounVec2 <- unlist(noun2)
nounVec2 <- Filter(function(x){nchar(x)>=2}, nounVec2)

nounFrame2 <- data.frame(nounVec2)
nounFreq2 <- table(nounFrame2)
head(sort(nounFreq2, decreasing = T), 100)

pal <- brewer.pal(3, "Set2")
wordcloud(names(nounFreq2), freq=nounFreq2, scale=c(5.5, 1), rot.per = 0.5, min.freq = 1, random.order = F, random.color=T, colors=pal, max.words = 100)



install.packages("tm")
library(tm)
install.packages("stringr")
library(stringr)

cps = Corpus(VectorSource(noun))
tdm = TermDocumentMatrix(cps)
tdm.matrix = as.matrix(tdm)
word.count = rowSums(tdm.matrix)
word.order = order(word.count, decreasing = T)
freq.word = tdm.matrix[word.order[1:100],]
rownames(tdm.matrix)[word.order[1:100]]
co.matrix = freq.word %*% t(freq.word)

library(qgraph)

qg <- qgraph(co.matrix,labels=rownames(co.matrix),diag=F,layout='spring',edge.color='black',vsize=log(diag(co.matrix))*1.5)
plot(qg)