12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- # Exploration on data
- # Export images 700x400
-
- # Libraries
- library(ggplot2)
- library(dplyr)
- library(RColorBrewer)
- library(scales)
-
- # Load data from Rdata file
- load("crashdata.Rdata")
-
- # Alternative way to load data
- #dat <- read.csv("finaldata_201809.csv")
-
- # Relevel crash severity
- dat$CRASH_SEV <- factor(dat$CRASH_SEV, levels = c("N", "M", "S", "F"))
-
- str(dat)
-
- yeartab <- table(dat$CRASH_YEAR)
- mpy <- sum(yeartab)/(18.75)
-
- # Obviously, data missing from 2018
- ggplot(dat) + geom_bar(aes(x=CRASH_YEAR), fill=NA, col="black") +
- geom_hline(yintercept = mpy, linetype="dashed") + theme_classic() +
- labs(x = "Year", y = "Number of crashes")
- table(dat$CRASH_FIN_YEAR)
-
- # Check that severity is given as F if and only if nonzero number of fatalities
- table(dat$CRASH_SEV, dat$FATAL_COUNT)
-
- # Look at number of vehicles involved
- levels(dat$MULTI_VEH)
- table(dat$MULTI_VEH, dat$CRASH_SEV)
-
- # Open road vs vehicles
- vehtab <- table(dat$MULTI_VEH, dat$URBAN, dat$CRASH_SEV == "F")
- vehtab
-
- # Graph crash severity
- ggplot(dat) + geom_bar(aes(x=CRASH_SEV), fill=NA, col="black") +
- theme_classic() + labs(x = "Crash severity", y = "Number of crashes") +
- scale_y_continuous(labels = scales::comma)
-
- knitr::kable(table(dat$MULTI_VEH, dat$CRASH_SEV))
-
- # Not all openroad crashes are on state highways
- table(dat$URBAN, dat$CRASH_SH_DESC)
-
- # Motorcycle crash severity
- table(dat$CRASH_SEV, dat$MOTOR_CYCLE > 0)
-
- # Just look at fatal crashes on the open road on state highways
- rurhwy <- dat %>% filter(URBAN == "Openroad", CRASH_SH_DESC == "Yes")
- frurhwy <- rurhwy %>% filter(CRASH_SEV == "F")
-
- str(frurhwy)
-
- knitr::kable(table(rurhwy$MULTI_VEH, rurhwy$CRASH_SEV))
-
- ggplot(rurhwy) +
- geom_bar(aes(fill = MULTI_VEH != "Single vehicle", x = CRASH_SEV), position="dodge") +
- scale_fill_brewer("Vehicles involved", type="qual", palette = "Dark2",
- labels = c("Single vehicle", "All other categories")) +
- scale_x_discrete("Severity", labels = c("Not", "Moderate", "Severe", "Fatal")) +
- theme_classic() + theme(legend.position = "right")
-
- rurtype <- as.data.frame.matrix(table(rurhwy$CRASH_SEV, rurhwy$MULTI_VEH))
- rurtypefrac <- rurtype / rowSums(rurtype)
- rurtypefrac["Single vehicle"]
-
- colSums(rurtype)/sum(rurtype)
|