## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)


## ------------------------------------------------------------------------
con <- url("https://github.com/jlnewton87/Programming/raw/master/R/homicides.txt")
homicides <- readLines(con)

## Total number of events recorded
length(homicides)
homicides[1]
homicides[1000]


## ------------------------------------------------------------------------
g <- grep("iconHomicideShooting", homicides)
length(g)


## ------------------------------------------------------------------------
g <- grep("iconHomicideShooting|icon_homicide_shooting", homicides)
length(g)


## ------------------------------------------------------------------------
g <- grep("Cause: shooting", homicides)
length(g)


## ------------------------------------------------------------------------
g <- grep("[Ss]hooting", homicides)
length(g)


## ------------------------------------------------------------------------
i <- grep("[cC]ause: [Ss]hooting", homicides)
str(i)


## ------------------------------------------------------------------------
j <- grep("[Ss]hooting", homicides)
str(j)


## ------------------------------------------------------------------------
setdiff(i, j)
setdiff(j, i)


## ------------------------------------------------------------------------
homicides[859]


## ------------------------------------------------------------------------
grep("^New", state.name)


## ------------------------------------------------------------------------
grep("^New", state.name, value = TRUE)


## ------------------------------------------------------------------------
g <- grepl("^New", state.name)
g
state.name[g]


## ------------------------------------------------------------------------
homicides[1]


## ------------------------------------------------------------------------
homicides[954]


## ------------------------------------------------------------------------
regexpr("<dd>[F|f]ound(.*)</dd>", homicides[1:10])


## ------------------------------------------------------------------------
substr(homicides[1], 177, 177 + 93 - 1)


## ------------------------------------------------------------------------
regexpr("<dd>[F|f]ound(.*?)</dd>", homicides[1:10])


## ------------------------------------------------------------------------
substr(homicides[1], 177, 177 + 33 - 1)


## ------------------------------------------------------------------------
h <- regexpr("<dd>[F|f]ound(.*?)</dd>", homicides[1:5])
regmatches(homicides[1:5], h)


## ------------------------------------------------------------------------
substr(homicides[1], 177, 177 + 33 - 1)


## ------------------------------------------------------------------------
h1 <- substr(homicides[1], 177, 177 + 33 - 1)
my.substring <- function(x){
                  n <- nchar(x)
                  substring(x, 14, n-5)
                }
my.substring(h1)


## ------------------------------------------------------------------------
g <- regmatches(homicides[1:5], h)
sg <- sapply(g, my.substring)
as.character(sg)


## ------------------------------------------------------------------------
sub("<dd>[F|f]ound on |</dd>", "", h1)


## ------------------------------------------------------------------------
gsub("<dd>[F|f]ound on |</dd>", "", h1)


## ------------------------------------------------------------------------
h <- regexpr("<dd>[F|f]ound(.*?)</dd>", homicides[1:5])
g <- regmatches(homicides[1:5], h)
sg <- gsub("<dd>[F|f]ound on |</dd>", "", g)
sg


## ----results="hide"------------------------------------------------------
Sys.setlocale("LC_TIME", "en_US.UTF-8")

## ------------------------------------------------------------------------
as.Date(sg, format = "%B %d, %Y")


## ------------------------------------------------------------------------
regexec("<dd>[F|f]ound on (.*?)</dd>", homicides[1])


## ------------------------------------------------------------------------
regexpr("<dd>[F|f]ound on (.*?)</dd>", homicides[1])


## ------------------------------------------------------------------------
regexec("<dd>[F|f]ound on .*?</dd>", homicides[1])


## ------------------------------------------------------------------------
regexec("<dd>[F|f]ound on (.*?)</dd>", homicides[1])


## ------------------------------------------------------------------------
substr(homicides[1], 177, 177 + 33 - 1)


## ------------------------------------------------------------------------
substr(homicides[1], 190, 190 + 15 - 1)


## ------------------------------------------------------------------------
r <- regexec("<dd>[F|f]ound on (.*?)</dd>", homicides[1:2])
regmatches(homicides[1:2], r) 


## ------------------------------------------------------------------------
r <- regexec("<dd>[F|f]ound on (.*?)</dd>", homicides)
m <- regmatches(homicides, r)


## ------------------------------------------------------------------------
dates <- sapply(m, function(x) x[2])


## ------------------------------------------------------------------------
dates <- as.Date(dates, "%B %d, %Y")


## ------------------------------------------------------------------------
hist(dates, "month", freq = TRUE, main = "Monthly Homicides in Baltimore")


## ----results="hide"------------------------------------------------------
Sys.setlocale("LC_TIME", "ca_ES.UTF-8")

