°øÀ¯ÀÚ·á HOME > ÀÚ·á½Ç > °øÀ¯ÀÚ·á
 
[À¯¿ëÇÑ RÄÚµù]³×À̹ö¿¡¼­ °Ë»ö¾î¿¡ ´ëÇØ ±â»çŸÀÌƲÀ» ÀúÀåÇÏ´Â ÄÚµå
°ü¸®ÀÚ 19-10-02 09:59 2,058
## ÀÛ¼ºÀÚ: ¿°Ã¢ÈÆ(°æÈñ´ëÇб³ °æ¿µÇаú, CAITECH)
## °Ô½ÃÀÏ: 2019³â 10¿ù 2ÀÏ
 
library(RSelenium)
library(rvest)
library(httr)
library(stringr)
###Å©·Ñ¸µ¿¡ ÇÊ¿äÇÑ library¼³Ä¡ 
 
remDr <- remoteDriver(remoteServerAddr = "localhost" ,
                      port = 4445L,
                      browserName = "chrome")
###Rselenium ÀÛµ¿À» À§ÇÑ Å©·Ò Æ÷Æ® °íÁ¤

remDr$open()
### Å©·Òâ ¿ÀÇÂ

hhd<-c("°¡»óÈ­Æó","ºñÆ®ÄÚÀÎ", "ÀÌ´õ¸®¿ò","¸®ºê¶ó", "ICO")
### ´º½º±â»ç °Ë»ö¿¡ ÇÊ¿äÇÑ Å°¿öµå ÁöÁ¤

a<-as.Date("2019-09-18")
b<-as.Date("2019-09-24")

c <- seq(a, b, 1)
### ³¯Â¥ ¹üÀ§ ÁöÁ¤
 
ff<-1
fff<-4001
ffd<-seq(ff,fff,10)
### °Ë»ö¾î ¸¶´Ù ÇØ´çÇÏ´Â ÆäÀÌÁö ÁöÁ¤

tototal<-data.frame()
### Å©·Ñ¸µ °á°ú ÆÄÀÏÀ» À§ÇÑ µ¥ÀÌÅÍÇÁ·¹ÀÓ »ý¼º

for (q in 1:length(hhd)) {
 
for (i in 1:length(c) )
{
 
  for (j in 1:length(ffd))
    {
     
      aa<-str_replace_all(c[i],"-",".")
      remDr$navigate(paste0("https://search.naver.com/search.naver?where=news&query=",hhd[q],"&sm=tab_opt&sort=0&photo=0&field=1&reporter_article=&pd=3","&ds=",aa,"&de=",aa,"&start=",ffd[j]))  
      ### ´º½º±â»ç À§Ä¡ URL
      source = remDr$getPageSource()[[1]]
      html = source %>% read_html()
  ###ÇØ´çÆäÀÌÁö Á¤º¸ ºÒ·¯¿À±â

      none = html %>% html_nodes("div#wrap") %>% html_nodes("div#container") %>% html_nodes("div#content") %>% html_nodes("div#main_pack")%>% html_nodes("div#notfound") %>% html_nodes("p") %>%html_text()
     
      if(length(none) != 0)
      {
        break
      }
      ###°Ë»öÆäÀÌÁö¿¡ ¾Æ¹«°Íµµ ¾øÀ» °æ¿ì¸¦ ´ëºñÇÑ IF¹®
     
      titles = html %>% html_node("ul.type01") %>% html_nodes("li") %>% html_nodes("dt") %>% html_nodes("a") %>% html_text()
     
      pub = html %>% html_node("ul.type01") %>% html_nodes("li") %>% html_nodes("dl > dd > span._sp_each_source") %>% html_text()
     
      dates = html %>% html_node("ul.type01") %>% html_nodes("li") %>% html_nodes("dl > dd.txt_inline") %>% html_text() %>% str_extract_all("\\d+\\.\\d+\\.\\d+\\.") %>% unlist()
### ±â»çÁ¦¸ñ, ½Å¹®»ç, ³¯Â¥ Á¤º¸ ºÒ·¯¿À±â      

      if ((length(titles)==length(dates))==FALSE) {
        wwx<-str_replace_all(c[i],"-",".")
        dates<-rep(wwx, length(titles))
      }
     
     
      page=html %>% html_nodes("div.paging > a.next") %>% html_text()
     
      total<-data.frame(titles,pub,dates)
      tototal<-rbind(tototal,total)
      ### ´º½º±â°¡ Á¤º¸ ½×±â
      if (length(page) == 0)
      {
        break
      }
    }
}
  save(tototal,file =paste0(hhd[q],".Rdata"))
  write.csv(tototal,paste0(hhd[q],".csv"),row.names = F)
  rm(tototal)
###µ¥ÀÌÅÍ ÀÚµ¿ÀúÀå ¹× Á¦°Å
}