데이터 과학에 기반한 경영 전문 연구인력 양성팀 > 공유자료

HOME > 자료실 > 공유자료

[정보기술활용연구] 감성분석 R 코드

관리자

16-04-05 10:14

4,428

movies.csv (181byte) [221] DATE : 2016-04-05 10:14:42

install.packages("twitteR")

install.packages("plyr")

install.packages("stringr")

install.packages("ggplot2")

ibrary(twitteR)
library(plyr)
library(stringr)
library(ggplot2)

#setwd( // 자신의 작업 폴더 path // )
#The sequence of column in CSV does matter the smoot_spline method to work properly
if(Sys.info()["user"] == 'obkwon'){
   path = "// 감성어휘사전과 csv 파일이 있는 위치 + '/'"
}else{
   path = " // 감성어휘사전과 csv 파일이 있는 위치 + '/'"
}

# function score.sentiment
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
   # Parameters
   # sentences: vector of text to score
   # pos.words: vector of words of postive sentiment
   # neg.words: vector of words of negative sentiment
   # .progress: passed to lapply() to control of progress bar

   # create simple array of scores with laply
   scores = lapply(sentences,
                  function(sentence, pos.words, neg.words)
                  {
                    # remove punctuation
                    sentence = gsub("[[:punct:]]", "", sentence)
                    # remove control characters
                    sentence = gsub("[[:cntrl:]]", "", sentence)
                    # remove digits?
                    sentence = gsub('\\d+', '', sentence)
                    # remove html links
                    sentence = gsub("http\\w+", "", sentence)
                    # remove unnecessary spaces
                    sentence = gsub("[ \t]{2,}", "", sentence)
                    sentence = gsub("^\\s+|\\s+$", "", sentence)

                    # define error handling function when trying tolower
                    tryTolower = function(x)
                    {
                      # create missing value
                      y = NA
                      # tryCatch error
                      try_error = tryCatch(tolower(x), error=function(e) e)
                      # if not an error
                      if (!inherits(try_error, "error"))
                        y = tolower(x)
                      # result
                      return(y)
                    }
                    # use tryTolower with sapply
                    sentence = sapply(sentence, tryTolower)

                    # split sentence into words with str_split (stringr package)
                    word.list = str_split(sentence, "\\s+")
                    words = unlist(word.list)

                    # compare words to the dictionaries of positive & negative terms
                    pos.matches = match(words, pos.words)
                    neg.matches = match(words, neg.words)

                    # get the position of the matched term or NA
                    # we just want a TRUE/FALSE
                    pos.matches = !is.na(pos.matches)
                    neg.matches = !is.na(neg.matches)

                   # final score
                    score = sum(pos.matches) - sum(neg.matches)
                    return(score)
                  }, pos.words, neg.words, .progress=.progress )

   # data frame with scores for each sentence
   scores.df = data.frame(text=sentences, score=scores)
   return(scores.df)
}

# import positive and negative words
posText = paste(path,"positive-words.txt", sep = "")
negText = paste(path,"negative-words.txt", sep = "")
# import positive and negative words
pos.words = readLines(posText)
neg.words = readLines(negText)

movies <- read.csv("movies.csv", header=T)
## movies <- data.frame(name=c("영화1", "영화2"), review=c("You're awesome and I love you.", "I hate and hate and hate, So angry!"), score=c(0,0))
(results <- score.sentiment(movies$review, pos.words, neg.words))
movies$score <- results$score
movies

경희대학교 BK21

전체메뉴