expl.R 2.1KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # Exploration on data
  2. # Export images 700x400
  3. # Libraries
  4. library(ggplot2)
  5. library(dplyr)
  6. library(RColorBrewer)
  7. library(scales)
  8. # Load data from Rdata file
  9. load("crashdata.Rdata")
  10. # Alternative way to load data
  11. #dat <- read.csv("finaldata_201809.csv")
  12. # Relevel crash severity
  13. dat$CRASH_SEV <- factor(dat$CRASH_SEV, levels = c("N", "M", "S", "F"))
  14. str(dat)
  15. yeartab <- table(dat$CRASH_YEAR)
  16. mpy <- mean(yeartab)
  17. # Obviously, data missing from 2018
  18. ggplot(dat) + geom_bar(aes(x=CRASH_YEAR), fill=NA, col="black") +
  19. geom_hline(yintercept = mpy, linetype="dashed") + theme_classic() +
  20. labs(x = "Year", y = "Number of crashes")
  21. table(dat$CRASH_FIN_YEAR)
  22. # Check that severity is given as F if and only if nonzero number of fatalities
  23. table(dat$CRASH_SEV, dat$FATAL_COUNT)
  24. # Look at number of vehicles involved
  25. levels(dat$MULTI_VEH)
  26. table(dat$MULTI_VEH, dat$CRASH_SEV)
  27. # Open road vs vehicles
  28. vehtab <- table(dat$MULTI_VEH, dat$URBAN, dat$CRASH_SEV == "F")
  29. vehtab
  30. # Graph crash severity
  31. ggplot(dat) + geom_bar(aes(x=CRASH_SEV), fill=NA, col="black") +
  32. theme_classic() + labs(x = "Crash severity", y = "Number of crashes") +
  33. scale_y_continuous(labels = scales::comma)
  34. # Not all openroad crashes are on state highways
  35. table(dat$URBAN, dat$CRASH_SH_DESC)
  36. # Motorcycle crash severity
  37. table(dat$CRASH_SEV, dat$MOTOR_CYCLE > 0)
  38. # Just look at fatal crashes on the open road on state highways
  39. rurhwy <- dat %>% filter(URBAN == "Openroad", CRASH_SH_DESC == "Yes")
  40. frurhwy <- rurhwy %>% filter(CRASH_SEV == "F")
  41. str(frurhwy)
  42. knitr::kable(table(rurhwy$MULTI_VEH, rurhwy$CRASH_SEV))
  43. ggplot(rurhwy) +
  44. geom_bar(aes(fill = MULTI_VEH != "Single vehicle", x = CRASH_SEV), position="dodge") +
  45. scale_fill_brewer("Vehicles involved", type="qual", palette = "Dark2",
  46. labels = c("Single vehicle", "All other categories")) +
  47. scale_x_discrete("Severity", labels = c("Not", "Moderate", "Severe", "Fatal")) +
  48. theme_classic() + theme(legend.position = "right")
  49. rurtype <- as.data.frame.matrix(table(rurhwy$CRASH_SEV, rurhwy$MULTI_VEH))
  50. rurtypefrac <- rurtype / rowSums(rurtype)
  51. rurtypefrac["Single vehicle"]
  52. colSums(rurtype)/sum(rurtype)