# Exploration on data # Export images 700x400 # Libraries library(ggplot2) library(dplyr) library(RColorBrewer) library(scales) # Load data from Rdata file load("crashdata.Rdata") # Alternative way to load data #dat <- read.csv("finaldata_201809.csv") # Relevel crash severity dat$CRASH_SEV <- factor(dat$CRASH_SEV, levels = c("N", "M", "S", "F")) str(dat) yeartab <- table(dat$CRASH_YEAR) mpy <- sum(yeartab)/(18.75) # Obviously, data missing from 2018 ggplot(dat) + geom_bar(aes(x=CRASH_YEAR), fill=NA, col="black") + geom_hline(yintercept = mpy, linetype="dashed") + theme_classic() + labs(x = "Year", y = "Number of crashes") table(dat$CRASH_FIN_YEAR) # Check that severity is given as F if and only if nonzero number of fatalities table(dat$CRASH_SEV, dat$FATAL_COUNT) # Look at number of vehicles involved levels(dat$MULTI_VEH) table(dat$MULTI_VEH, dat$CRASH_SEV) # Open road vs vehicles vehtab <- table(dat$MULTI_VEH, dat$URBAN, dat$CRASH_SEV == "F") vehtab # Graph crash severity ggplot(dat) + geom_bar(aes(x=CRASH_SEV), fill=NA, col="black") + theme_classic() + labs(x = "Crash severity", y = "Number of crashes") + scale_y_continuous(labels = scales::comma) knitr::kable(table(dat$MULTI_VEH, dat$CRASH_SEV)) # Not all openroad crashes are on state highways table(dat$URBAN, dat$CRASH_SH_DESC) # Motorcycle crash severity table(dat$CRASH_SEV, dat$MOTOR_CYCLE > 0) # Just look at fatal crashes on the open road on state highways rurhwy <- dat %>% filter(URBAN == "Openroad", CRASH_SH_DESC == "Yes") frurhwy <- rurhwy %>% filter(CRASH_SEV == "F") str(frurhwy) knitr::kable(table(rurhwy$MULTI_VEH, rurhwy$CRASH_SEV)) ggplot(rurhwy) + geom_bar(aes(fill = MULTI_VEH != "Single vehicle", x = CRASH_SEV), position="dodge") + scale_fill_brewer("Vehicles involved", type="qual", palette = "Dark2", labels = c("Single vehicle", "All other categories")) + scale_x_discrete("Severity", labels = c("Not", "Moderate", "Severe", "Fatal")) + theme_classic() + theme(legend.position = "right") rurtype <- as.data.frame.matrix(table(rurhwy$CRASH_SEV, rurhwy$MULTI_VEH)) rurtypefrac <- rurtype / rowSums(rurtype) rurtypefrac["Single vehicle"] colSums(rurtype)/sum(rurtype)