#Title: Simple Web Scraper
#From Sharon Machlis
#From Ch. 4.8 in Machlis:
#‘Scrape’ data from Web pages with the rvest package 
#and SelectorGadget browser extension or JavaScript bookmarklet. 
#
#SelectorGadget helps you discover the CSS elements of data you want to copy 
#that are on an HTML page; then rvest uses R to find and save that data. 
#Instructions and a video: http://bit.ly/Rscraping. 
#RStudio webinar: https://www.rstudio.com/resources/webinars/extracting-data-from-the-web-part-2/ .


pacman::p_load(rvest, robotstxt, dplyr, purrr)

vignette(package = "robotstxt")
vignette("using_robotstxt")

library(robotstxt)
paths_allowed("https://www.rstudio.com/resources/cheatsheets/")

my_css <- ".button-default"

library(rvest)
my_html <- read_html("https://www.rstudio.com/resources/cheatsheets/")

my_nodes <- html_nodes(my_html, my_css)

my_nodes[[1]]

my_urls <- html_nodes(my_html, my_css) %>%
  html_attr('href')

my_urls[1]


my_nodes_text <- html_nodes(my_html, my_css) %>%
  html_text()
my_nodes_text[1]

basename("https://github.com/rstudio/cheatsheets/raw/master/rmarkdown-2.0.pdf")

my_filenames <- map_chr(my_urls, basename)

walk2(my_urls, my_filenames, download.file)