공유자료 HOME > 자료실 > 공유자료
 
[정보기술활용연구] 감성분석 R 코드
관리자 16-04-05 10:14 2,641
   movies.csv (181byte) [216] DATE : 2016-04-05 10:14:42
install.packages("twitteR")
install.packages("plyr")
install.packages("stringr")
install.packages("ggplot2")
ibrary(twitteR)
library(plyr)
library(stringr)
library(ggplot2)
#setwd( // 자신의 작업 폴더 path // )
 #The sequence of column in CSV does matter the smoot_spline method to work properly
 if(Sys.info()["user"] == 'obkwon'){
   path = "// 감성어휘사전과 csv 파일이 있는 위치 + '/'"
 }else{
   path = "  // 감성어휘사전과 csv 파일이 있는 위치 + '/'"
 } 
 
 # function score.sentiment
 score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
 {
   # Parameters
   # sentences: vector of text to score
   # pos.words: vector of words of postive sentiment
   # neg.words: vector of words of negative sentiment
   # .progress: passed to lapply() to control of progress bar
   
   # create simple array of scores with laply
   scores = lapply(sentences,
                  function(sentence, pos.words, neg.words)
                  {
                    # remove punctuation
                    sentence = gsub("[[:punct:]]", "", sentence)
                    # remove control characters
                    sentence = gsub("[[:cntrl:]]", "", sentence)
                    # remove digits?
                    sentence = gsub('\\d+', '', sentence)
                    # remove html links
                    sentence = gsub("http\\w+", "", sentence)
                    # remove unnecessary spaces
                    sentence = gsub("[ \t]{2,}", "", sentence)
                    sentence = gsub("^\\s+|\\s+$", "", sentence)
                    
                    # define error handling function when trying tolower
                    tryTolower = function(x)
                    {
                      # create missing value
                      y = NA
                      # tryCatch error
                      try_error = tryCatch(tolower(x), error=function(e) e)
                      # if not an error
                      if (!inherits(try_error, "error"))
                        y = tolower(x)
                      # result
                      return(y)
                    }
                    # use tryTolower with sapply 
                    sentence = sapply(sentence, tryTolower)
                    
                    # split sentence into words with str_split (stringr package)
                    word.list = str_split(sentence, "\\s+")
                    words = unlist(word.list)
                    
                    # compare words to the dictionaries of positive & negative terms
                    pos.matches = match(words, pos.words)
                    neg.matches = match(words, neg.words)
                    
                    # get the position of the matched term or NA
                    # we just want a TRUE/FALSE
                    pos.matches = !is.na(pos.matches)
                    neg.matches = !is.na(neg.matches)
                    
                   # final score
                    score = sum(pos.matches) - sum(neg.matches)
                    return(score)
                  }, pos.words, neg.words, .progress=.progress )
   
   # data frame with scores for each sentence
   scores.df = data.frame(text=sentences, score=scores)
   return(scores.df)
 }
 
 
 # import positive and negative words
 posText = paste(path,"positive-words.txt", sep = "")
 negText = paste(path,"negative-words.txt", sep = "")
 # import positive and negative words
 pos.words = readLines(posText)
 neg.words = readLines(negText)
 
 movies <- read.csv("movies.csv", header=T)
## movies <- data.frame(name=c("영화1", "영화2"), review=c("You're awesome and I love you.",  "I hate and hate and hate, So angry!"), score=c(0,0))
 (results <- score.sentiment(movies$review, pos.words, neg.words))
 movies$score <- results$score
 movies