expl.R 2.2KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # Exploration on data
  2. # Export images 700x400
  3. # Libraries
  4. library(ggplot2)
  5. library(dplyr)
  6. library(RColorBrewer)
  7. library(scales)
  8. # Load data from Rdata file
  9. load("crashdata.Rdata")
  10. # Alternative way to load data
  11. #dat <- read.csv("finaldata_201809.csv")
  12. # Relevel crash severity
  13. dat$CRASH_SEV <- factor(dat$CRASH_SEV, levels = c("N", "M", "S", "F"))
  14. str(dat)
  15. yeartab <- table(dat$CRASH_YEAR)
  16. mpy <- sum(yeartab)/(18.75)
  17. # Obviously, data missing from 2018
  18. ggplot(dat) + geom_bar(aes(x=CRASH_YEAR), fill=NA, col="black") +
  19. geom_hline(yintercept = mpy, linetype="dashed") + theme_classic() +
  20. labs(x = "Year", y = "Number of crashes")
  21. table(dat$CRASH_FIN_YEAR)
  22. # Check that severity is given as F if and only if nonzero number of fatalities
  23. table(dat$CRASH_SEV, dat$FATAL_COUNT)
  24. # Look at number of vehicles involved
  25. levels(dat$MULTI_VEH)
  26. table(dat$MULTI_VEH, dat$CRASH_SEV)
  27. # Open road vs vehicles
  28. vehtab <- table(dat$MULTI_VEH, dat$URBAN, dat$CRASH_SEV == "F")
  29. vehtab
  30. # Graph crash severity
  31. ggplot(dat) + geom_bar(aes(x=CRASH_SEV), fill=NA, col="black") +
  32. theme_classic() + labs(x = "Crash severity", y = "Number of crashes") +
  33. scale_y_continuous(labels = scales::comma)
  34. knitr::kable(table(dat$MULTI_VEH, dat$CRASH_SEV))
  35. # Not all openroad crashes are on state highways
  36. table(dat$URBAN, dat$CRASH_SH_DESC)
  37. # Motorcycle crash severity
  38. table(dat$CRASH_SEV, dat$MOTOR_CYCLE > 0)
  39. # Just look at fatal crashes on the open road on state highways
  40. rurhwy <- dat %>% filter(URBAN == "Openroad", CRASH_SH_DESC == "Yes")
  41. frurhwy <- rurhwy %>% filter(CRASH_SEV == "F")
  42. str(frurhwy)
  43. knitr::kable(table(rurhwy$MULTI_VEH, rurhwy$CRASH_SEV))
  44. ggplot(rurhwy) +
  45. geom_bar(aes(fill = MULTI_VEH != "Single vehicle", x = CRASH_SEV), position="dodge") +
  46. scale_fill_brewer("Vehicles involved", type="qual", palette = "Dark2",
  47. labels = c("Single vehicle", "All other categories")) +
  48. scale_x_discrete("Severity", labels = c("Not", "Moderate", "Severe", "Fatal")) +
  49. theme_classic() + theme(legend.position = "right")
  50. rurtype <- as.data.frame.matrix(table(rurhwy$CRASH_SEV, rurhwy$MULTI_VEH))
  51. rurtypefrac <- rurtype / rowSums(rurtype)
  52. rurtypefrac["Single vehicle"]
  53. colSums(rurtype)/sum(rurtype)