2 Commits

Author SHA1 Message Date
  Petra Lamborn 6d418ecf3d Exploratory analysis 4 years ago
  Petra Lamborn f232cb2d59 Add metadataset 4 years ago
2 changed files with 134 additions and 1 deletions
  1. 87
    0
      disaggregated-crash-data-metadata.csv
  2. 47
    1
      expl.R

+ 87
- 0
disaggregated-crash-data-metadata.csv View File

@@ -0,0 +1,87 @@
1
+Variable Name,Description,,,
2
+crash_year,"The year in which a crash occurred, if known. ",,,
3
+crash_fin_year,"The financial (fin) year in which a crash occurred, if known.",,,
4
+crash_sev,"The severity of a crash. Possible values are 'F' (fatal), 'S' (serious), 'M' (minor), 'N' (non-injury). This is determined by the worst injury sustained in the crash at time of entry.",,,
5
+fatal_count,A count of the number of fatal casualties associated with this crash.,,,
6
+seriousinj_count,A count of the number of serious injuries (inj) associated with this crash.,,,
7
+minorinj_count,A count of the number of minor injuries (inj) associated with this crash.,,,
8
+multi_veh,"A variable derived from the number of vehicles which are given roles in the crash. The variable has the following possible values; ‘single vehicle’, ‘multi-vehicle’ , ‘cyclist and vehicle(s)’, ‘pedestrian and vehicle(s)’ , ‘cyclist only’, ‘cyclist(s) and pedestrian(s)’ , ‘vehicle(s) and other’, ‘others, no vehicles’ and ‘other’. ‘Vehicle’ means non-parked vehicle. Parked vehicles are treated as objects in a crash.",,,
9
+holiday,"Indicates where a crash occurred during a 'Christmas/New Year', 'Easter', 'Queens Birthday' or 'Labour Weekend' holiday period, otherwise 'None'. ",,,
10
+lg_region_desc,Identifies the local government (LG) region. The boundaries match territorial local authority (TLA) boundaries in most places. A blank cell occurs where the crash is said to have occurred in a particular TLA and the LG boundaries do not match.,,,
11
+tla_id,The unique identifier for a territorial local authority (TLA). Each crash is assigned a TLA based on where the crash occurred.,,,
12
+tla_name,The name of the territorial local authority (TLA) the crash has been attributed.,,,
13
+au_id,The unique identifier of an area unit.,,,
14
+mb_id,The unique identifier of a meshblock.,,,
15
+easting,"The easting coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m. Please note, in some instances crashes are not able to be assigned to GPS co-ordinates. These crashes have been assigned eastings and northings of ‘0,0’ in this dataset. There are two main reasons that a GPS coordinate cannot be allocated to a crash. Firstly, that the crash has been reported but the location was unknown. Secondly in a small number of instances, a crash may have occurred on a road which is not yet captured on the CAS spatial layer.",,,
16
+northing,"The northing coordinate of an object (usually a crash) expressed in NZMG referred to the WGS84 datum to a precision of 1m. Please note, in some instances crashes are not able to be assigned to GPS co-ordinates. These crashes have been assigned eastings and northings of ‘0,0’ in this dataset. There are two main reasons that a GPS coordinate cannot be allocated to a crash. Firstly, that the crash has been reported but the location was unknown. Secondly in a small number of instances, a crash may have occurred on a road which is not yet captured on the CAS spatial layer.",,,
17
+crash_locn1,"Part 1 of the 'crash location' (crash_locn). May be a road name, route position (RP), landmark, or other, e.g. 'Ninety Mile Beach'. Used for location descriptions in reports etc.",,,
18
+crash_locn2,"Part 2 of the 'crash location' (crash_locn). May be a side road name, landmark etc. Used for location descriptions in reports etc.",,,
19
+outdtd_locn_desc,"Indicates if the location  for this crash is an 'Outdated Location'(outdtd_locn) or 'Current location'. A crash is said to have an 'Outdated location' where  the road might have moved, or does not exist anymore. ",,,
20
+crash_rp_rs,The 'reference station' (RS) for the 'route position' (RP) of a crash.,,,
21
+intersection,"Indicate if a crash happened at an 'Intersection', 'At Landmark' or 'Unknown'.",,,
22
+junction_type,"The type of junction the crash happened at. Possible road junctions include ‘Driveway’, ‘Roundabout’, ‘X Type Junction’, 'T Type Junction', 'Y Type Junction', or 'Multi Road Join'. The junction type may also be unknown. Note crashes that did not occur at a junction are also given a value of unknown. ",,,
23
+cr_rd_side_rd,Indicates whether the principal vehicle in a crash was on the crash road (cr_rd) [1] or side road (sd_rd) [2] at the time of the crash. Note that 'on side road' (2)  can only happen if the crash occurred at an intersection.,,,
24
+crash_dirn_desc,"The direction (dirn) of the crash from the reference point. Values possible are 'North', 'East', 'South' or 'West'.",,,
25
+crash_dist,The distance (dist) of the crash from the reference point for the crash. The reference point is often the intersection of 'crash road' and 'side road' (refer to 'cr_rd_sd_rd' variable).,,,
26
+crash_rp_dirn_desc,"Indicates the direction of travel (where known) on a State Highway (SH) with respect to the highway origin. Possible values include 'Increasing' where the crash occurred in increasing distance from SH origin, 'Decreasing' where the crash occurred in decreasing distance to the SH origin, or blank. ",,,
27
+dirn_role1_desc,"The direction (dirn) of the principal vehicle involved in the crash. Possible values are North, South, East or West.",,,
28
+crash_rp_disp,The displacement (disp) of the crash from a reference station (RS). Part of the crash route position (RP).,,,
29
+crash_sh_desc,"Indicates where a crash is reported to have occurred on a State Highway (SH). Possible values include 'Yes' where the crash occurred on a SH, otherwise 'No'.",,,
30
+crash_rp_sh,The State Highway (SH) on which a crash occurred. This is part of a 'route position' (RP) for the crash. Possible values can be any valid natural SH designation reference (e.g. '1N' is SH1 in the North Island.).,,, 
31
+crash_rp_news_desc,"Where the crash occurred on a median-divided State Highway (SH), this flag indicates which side of the median the crash happened. Values 'Northbound', 'Southbound, 'Eastbound' or 'Westbound'.",,,
32
+intsn_midblock,"A derived variable to indicate if a crash occured at an intersection (intsn) or not. The 'intsn_midblock' variable is calculated using the 'intersection' and 'junction_type' variables. Values are  'Intersection' (where intersection variable = 'Intersection' or {'Intersection' = 'At Landmark' and junction_type is not in ('Unknown' or 'Driveway')} OR {Intersection = 'Unknown' and crash_dist <= 10}), otherwise 'Midblock' for crashes not meeting the criteria for 'Intersection').",,, 
33
+flat_hill,Whether the road is flat or sloped. Possible values include 'Flat or 'Hill'.,,, 
34
+road_character,"The general nature of the road. Possible values include 'Bridge', 'Motorway Ramp', 'Railway Crossing' or  ' Unknown'.",,, 
35
+road_curvature,"The curvature of the road. Possible values include 'Straight Road', 'Easy Curve', 'Moderate Curve' or 'Severe curve'.",,, 
36
+road_lane,"The lane configuration of the road. Possible values : '1' (one way), '2' (two way), 'M' (for where a median exists), 'O' (for off-road lane configuations), ' ' ( for unknown or invalid configuarations).",,, 
37
+road_markings,"The road markings at the crash site. Possible values: 'Ped Crossing' (for pedestrian crossings), 'Raised Island', 'Painted Island', 'No Passing Lanes', 'Centre Line', 'No Marks' or ' Unknown'.",,,
38
+road_surface,The road surface description applying at the crash site. Possible values: 'Sealed' or 'Unsealed'.,,,
39
+road_wet,"The road wetness at the time and place of the crash. Possible values: 'Wet', 'Dry', 'Ice/Snow' or 'Unknown'",,,
40
+num_lanes,The number(num) of lanes on the crash road.,,,
41
+traffic_ctrl,"The traffic control (ctrl) signals at the crash site. Possible values are 'Traffic Signals', 'Stop Sign', 'Give Way Sign', 'Pointsman', 'School Patrol', 'Nil' or ' N/A'.",,,
42
+spd_lim,"The speed (spd) limit (lim)  in force at the crash site at the time of the crash. May be a number, or 'LSZ' for a limited speed zone.",,,
43
+adv_spd,The advisory (adv) speed (spd) at the crash site at the time of the crash.,,,
44
+tmp_spd_lim,The temporary (temp) speed (spd) limit (lim) at the crash site if one exists (e.g. for road works).,,,
45
+urban,"A derived variable using the 'spd_lim' variable. Possible values are 'Urban' (urban, spd_lim < 80) or 'Open Road' (open road, spd_lim >=80 or 'LSZ').",,,
46
+dark_light,"A variable derived from the 'light' variable. Values 'Dark' (if  'light' = 'Dark' or 'Twilight'), 'Light' ( 'light' = 'Bright', 'Overcast') or ' Unknown'  (light = ' ').",,,
47
+light,"The light at the time and place of the crash. Possible values: 'Bright Sun', 'Overcast', 'Twilight, 'Dark' or ' Unknown'.",,,
48
+street_light,"The street lighting at the time of the crash. Possible values 'On', 'Off', 'None' or ' Unknown'.",,,
49
+weather_a,"Indicates weather at the crash time/place. See wthr_b. Values that are possible are 'Fine', 'Mist', 'Light Rain', 'Heavy Rain', 'Snow', 'Unknown'.",,,
50
+weather_b,"The weather at the crash time/place. See weather_a. Values 'Frost', 'Strong Wind' or 'Unknown'.",,,
51
+Animals,"Derived variable to indicate how many times an 'Animal(s)'  was struck in the crash. This is used where the animals, being driven or led, were under control.",,,
52
+Bridge,"Derived variable to indicate how many times a bridge, tunnel, the abutments, handrails were struck in the crash.",,,
53
+Cliff_Bank,Derived variable to indicate how many times a 'cliff' or 'bank' was struck in the crash. This includes retaining walls,,,
54
+Debris,"Derived variable to indicate how many times debris, boulders or items dropped or thrown from a vehicle(s) were struck in the crash ",,,
55
+Ditch,Derived variable to indicate how many times a 'ditch' or 'waterable drainage channel' was struck in a crash.,,,
56
+Fence,"Derived variable to indicate how many times a 'fence' was struck in the crash. This includes letterbox(es), hoardings, private roadside furniture, hedges, sight rails, etc.",,,
57
+Guard_Rail,"Derived variable to indicate how many times a guard or guard rail was struck in the crash. This includes 'New Jersey' barriers, 'ARMCO', sand filled barriers, wire catch fences, etc.",,,
58
+House_Or_Bldg,"Derived variable to indicate how many times a houses, garages, sheds or other buildings(Bldg) were struck in the crash",,,
59
+Kerb,"Derived variable to indicate how many times a kerb was struck in the crash, that contributed directly to the crash. ",,,
60
+Obj_Thrown_Dropped,Derived variable to indicate how many times objects were thrown at or dropped on vehicles in the crash. ,,,
61
+Other,"Derived variable to indicate how many times an object was struck in a crash and the object struck was not pre-defined. This variable includes stockpiled materials, rubbish bins, fallen poles, fallen trees, etc.",,,
62
+Over_Bank,Derived variable to indicate how many times an embankment was struck or driven over during a crash. This variable includes other vertical drops driven over during a crash.,,,
63
+Parked_Vehicle,Derived variable to indicate how many times a parked or unattended vehicle was struck in the crash. This variable can include trailers.,,,
64
+Phone_Box_Etc,"Derived variable to indicate how many times a telephone kiosk traffic signal controllers, bus shelters or other public furniture was struck in the crash",,,
65
+Post_Or_Pole,"Derived variable to indicate how many times a post or pole was struck in the crash. This includes light, power, phone, utility poles and objects practically forming part of a pole (i.e. 'Transformer Guy' wires)",,,
66
+Roadworks,"Derived variable to indicate how many times an object associated with 'roadworks' (including signs, cones, drums, barriers, but not roadwork vehicles) was struck during the crash",,,
67
+Slip_Or_Flood,"Derived variable to indicate how many times landslips, washouts or floods (excluding rivers) were objects struck in the crash",,,
68
+Stray_Animal,"Derived variable to indicate how many times a stray animal(s) was struck in the crash. This variable includes wild animals such as pigs, goats, deer, straying farm animals, house pets and birds.",,,
69
+Traffic_Island,"Derived variable to indicate how many times a traffic island, medians (excluding barriers)was struck in the crash.",,,
70
+Traffic_Sign,"Derived variable to indicate how many times 'traffic signage' (including traffic signals, their poles, bollards or roadside delineators) was struck in the crash.",,,
71
+Train,"Derived variable to indicate how many times a train, rolling stock or jiggers was struck in the crash, whether stationary or moving",,,
72
+Trees,Derived variable to indicate how many times trees or other growing items were struck during the crash. ,,,
73
+Vehicle,"Derived variable to indicate how many times a stationary attended vehicle was struck in the crash. This includes broken down vehicles, workmen's vehicles, taxis, buses.",,,
74
+Water_River,"Derived variable to indicate how many times a body of water (including rivers, streams, lakes, the sea, tidal flates, canals, watercourses or swanps) was struck in the crash.",,,
75
+BICYCLE,Derived variable to indicate how many bicycles were involved in the crash.,,,
76
+BUS,Derived variable to indicate how many buses were involved in the crash (excluding school buses which are counted in the SCHOOL_BUS field).,,,
77
+CAR_STN_WAGON,Derived variable to indicate how many cars or station wagons were involved in the crash.,,,
78
+MOPED,Derived variable to indicate how many mopeds were involved in the crash.,,,
79
+MOTOR_CYCLE,Derived variable to indicate how many motorcycles were involved in the crash.,,,
80
+OTHER_VEHICLE_TYPE,Derived variable to indicate how many other vehicles (not included in any other category) were involved in the crash.,,,
81
+SCHOOL_BUS,Derived variable to indicate how many school buses were involved in the crash.,,,
82
+SUV,Derived variable to indicate how many SUVs were involved in the crash.,,,
83
+TAXI,Derived variable to indicate how many taxis were involved in the crash.,,,
84
+TRUCK,Derived variable to indicate how many trucks were involved in the crash.,,,
85
+UNKNOWN_VEHICLE_TYPE,Derived variable to indicate how many vehicles were involved in the crash (where the vehicle type is unknown).,,,
86
+VAN_OR_UTILITY,Derived variable to indicate how many vans or utes were involved in the crash.,,,
87
+PEDESTRIAN,Derived variable to indicate how many pedestrians were involved in the crash. This includes pedestrians on foot, skateboards, scooters and wheelchairs.,,,

+ 47
- 1
expl.R View File

@@ -2,6 +2,8 @@
2 2
 
3 3
 # Libraries
4 4
 library(ggplot2)
5
+library(dplyr)
6
+library(RColorBrewer)
5 7
 
6 8
 # Load data from Rdata file
7 9
 load("crashdata.Rdata")
@@ -9,8 +11,52 @@ load("crashdata.Rdata")
9 11
 # Alternative way to load data
10 12
 #dat <- read.csv("finaldata_201809.csv")
11 13
 
14
+# Relevel crash severity
15
+dat$CRASH_SEV <- factor(dat$CRASH_SEV, levels = c("N", "M", "S", "F"))
16
+
12 17
 str(dat)
13 18
 
14 19
 yeartab <- table(dat$CRASH_YEAR)
20
+mpy <- mean(yeartab)
21
+
22
+# Obviously, data missing from 2018
23
+ggplot(dat) + geom_bar(aes(x=CRASH_YEAR), fill=NA, col="black") +
24
+  geom_hline(yintercept = mpy, linetype="dashed") + theme_classic() +
25
+  labs(x = "Year", y = "Number of crashes")
26
+table(dat$CRASH_FIN_YEAR)
27
+
28
+# Check that severity is given as F if and only if nonzero number of fatalities
29
+table(dat$CRASH_SEV, dat$FATAL_COUNT)
30
+
31
+# Look at number of vehicles involved
32
+levels(dat$MULTI_VEH)
33
+table(dat$MULTI_VEH, dat$CRASH_SEV)
34
+
35
+# Open road vs vehicles
36
+vehtab <- table(dat$MULTI_VEH, dat$URBAN, dat$CRASH_SEV == "F")
37
+vehtab
38
+
39
+# Graph crash severity
40
+ggplot(dat) + geom_bar(aes(x=CRASH_SEV), fill=NA, col="black") +
41
+  theme_classic() + labs(x = "Crash severity", y = "Number of crashes")
42
+
43
+# Not all openroad crashes are on state highways
44
+table(dat$URBAN, dat$CRASH_SH_DESC)
45
+
46
+# Motorcycle crash severity
47
+table(dat$CRASH_SEV, dat$MOTOR_CYCLE > 0)
48
+
49
+# Just look at fatal crashes on the open road on state highways
50
+rurhwy <- dat %>% filter(URBAN == "Openroad", CRASH_SH_DESC == "Yes")
51
+frurhwy <- rurhwy %>% filter(CRASH_SEV == "F")
52
+
53
+str(frurhwy)
54
+
55
+knitr::kable(table(rurhwy$MULTI_VEH, rurhwy$CRASH_SEV))
15 56
 
16
-ggplot(dat) + geom_bar(aes(x=CRASH_YEAR))
57
+ggplot(rurhwy) + 
58
+  geom_bar(aes(fill = MULTI_VEH != "Single vehicle", x = CRASH_SEV), position="dodge") +
59
+  scale_fill_brewer("Vehicles involved", type="qual", palette = "Dark2",
60
+                      labels = c("Single vehicle", "All other categories")) +
61
+  scale_x_discrete("Severity", labels = c("Not", "Moderate", "Severe", "Fatal")) +
62
+  theme_classic() + theme(legend.position = "right")