expl.R 1.9KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # Exploration on data
  2. # Libraries
  3. library(ggplot2)
  4. library(dplyr)
  5. library(RColorBrewer)
  6. # Load data from Rdata file
  7. load("crashdata.Rdata")
  8. # Alternative way to load data
  9. #dat <- read.csv("finaldata_201809.csv")
  10. # Relevel crash severity
  11. dat$CRASH_SEV <- factor(dat$CRASH_SEV, levels = c("N", "M", "S", "F"))
  12. str(dat)
  13. yeartab <- table(dat$CRASH_YEAR)
  14. mpy <- mean(yeartab)
  15. # Obviously, data missing from 2018
  16. ggplot(dat) + geom_bar(aes(x=CRASH_YEAR), fill=NA, col="black") +
  17. geom_hline(yintercept = mpy, linetype="dashed") + theme_classic() +
  18. labs(x = "Year", y = "Number of crashes")
  19. table(dat$CRASH_FIN_YEAR)
  20. # Check that severity is given as F if and only if nonzero number of fatalities
  21. table(dat$CRASH_SEV, dat$FATAL_COUNT)
  22. # Look at number of vehicles involved
  23. levels(dat$MULTI_VEH)
  24. table(dat$MULTI_VEH, dat$CRASH_SEV)
  25. # Open road vs vehicles
  26. vehtab <- table(dat$MULTI_VEH, dat$URBAN, dat$CRASH_SEV == "F")
  27. vehtab
  28. # Graph crash severity
  29. ggplot(dat) + geom_bar(aes(x=CRASH_SEV), fill=NA, col="black") +
  30. theme_classic() + labs(x = "Crash severity", y = "Number of crashes")
  31. # Not all openroad crashes are on state highways
  32. table(dat$URBAN, dat$CRASH_SH_DESC)
  33. # Motorcycle crash severity
  34. table(dat$CRASH_SEV, dat$MOTOR_CYCLE > 0)
  35. # Just look at fatal crashes on the open road on state highways
  36. rurhwy <- dat %>% filter(URBAN == "Openroad", CRASH_SH_DESC == "Yes")
  37. frurhwy <- rurhwy %>% filter(CRASH_SEV == "F")
  38. str(frurhwy)
  39. knitr::kable(table(rurhwy$MULTI_VEH, rurhwy$CRASH_SEV))
  40. ggplot(rurhwy) +
  41. geom_bar(aes(fill = MULTI_VEH != "Single vehicle", x = CRASH_SEV), position="dodge") +
  42. scale_fill_brewer("Vehicles involved", type="qual", palette = "Dark2",
  43. labels = c("Single vehicle", "All other categories")) +
  44. scale_x_discrete("Severity", labels = c("Not", "Moderate", "Severe", "Fatal")) +
  45. theme_classic() + theme(legend.position = "right")