install.packages("twitteR")
install.packages("KoNLP")
install.packages("wordcloud")
install.packages("qgraph")
library("twitteR")
consumerKey <- "Nj26Do961BvA7fEKA2X6l4MSU"
consumerSecret <- "TiyR37m7fwar5dT1E8cFBHtYIym8vvJl4vGtIssbVbwGFw7ekL"
accessToken <- "869790446251155456-KCpaQ6riybwFJyZTL4gYmuJ4B6CgQ8c"
accessTokenSecret <- "O6bgUTIKkmOtBEHuJW0mciKaRmH9i2vgi9xgh5OemvFzA"
setup_twitter_oauth(consumerKey, consumerSecret, accessToken,
accessTokenSecret)
keyword <- "¿À¹ö¿öÄ¡"
keyword <- enc2utf8(keyword)
result<-searchTwitter(keyword, lang="ko",n=10000)
library(KoNLP)
library(wordcloud)
library(RColorBrewer)
stuff_to_remove <- c("http[s]?://[[:alnum:].\\/]+",
"@[\\w]*", "<.*>", "'s", "\\(","\\)",
"#",":","¤¡-¤¾")
stuff_to_remove <- paste(stuff_to_remove, sep = "|", collapse="|")
result <- sapply(result, function(x) x$getText())
result
result <- gsub(stuff_to_remove," ", result)
result <- gsub("[¤¡-¤¾]"," ",result)
result <- gsub("[^°¡-힣0-9A-z]"," ",result)
result <- gsub("RT"," ",result)
result <- gsub("https"," ",result)
result <- gsub("_"," ",result)
result
result <- gsub("#"," ",result)
noun <- sapply(result, extractNoun, USE.NAMES = F)
noun
nounVec <- unlist(noun)
nounVec <- Filter(function(x){nchar(x)>=2}, nounVec)
nounVec <- gsub("RT","",nounVec)
nounVec <- gsub("https","",nounVec)
nounFrame <- data.frame(nounVec)
nounFreq <- table(nounFrame)
head(sort(nounFreq, decreasing = T), 100)
pal <- brewer.pal(3, "Set2")
wordcloud(names(nounFreq), freq=nounFreq, scale=c(5.5, 1), rot.per = 0.5, min.freq = 3, random.order = F, random.color=T, colors=pal, max.words = 100)
getwd()
txt <- readLines("KakaoTalkChats3.txt", encoding="UTF-8")
txt
txt <- gsub("[0-9]{4}[³â][ ][0-9]{1,2}[¿ù][ ][0-9]{1,2}[ÀÏ][ ][°¡-힣]{2}[ ][0-9]{1,2}[:][0-9]{1,2}","",txt)
txt <- gsub("[,][ ]\\S{1,}[ ][:][ ]","",txt)
txt <- gsub("[¤¡-¤¾]"," ",txt)
txt <- gsub("[^°¡-힣0-9A-z]"," ",txt)
txt <- gsub("[A-z]"," ",txt)
noun2 <- sapply(txt, extractNoun, USE.NAMES = F)
noun2
nounVec2 <- unlist(noun2)
nounVec2 <- Filter(function(x){nchar(x)>=2}, nounVec2)
nounFrame2 <- data.frame(nounVec2)
nounFreq2 <- table(nounFrame2)
head(sort(nounFreq2, decreasing = T), 100)
pal <- brewer.pal(3, "Set2")
wordcloud(names(nounFreq2), freq=nounFreq2, scale=c(5.5, 1), rot.per = 0.5, min.freq = 1, random.order = F, random.color=T, colors=pal, max.words = 100)
install.packages("tm")
library(tm)
install.packages("stringr")
library(stringr)
cps = Corpus(VectorSource(noun))
tdm = TermDocumentMatrix(cps)
tdm.matrix = as.matrix(tdm)
word.count = rowSums(tdm.matrix)
word.order = order(word.count, decreasing = T)
freq.word = tdm.matrix[word.order[1:100],]
rownames(tdm.matrix)[word.order[1:100]]
co.matrix = freq.word %*% t(freq.word)
library(qgraph)
qg <- qgraph(co.matrix,labels=rownames(co.matrix),diag=F,layout='spring',edge.color='black',vsize=log(diag(co.matrix))*1.5)
plot(qg)