|
@@ -0,0 +1,40 @@
|
|
1
|
+# Scratch file
|
|
2
|
+library(janitor)
|
|
3
|
+library(dplyr)
|
|
4
|
+library(readr)
|
|
5
|
+Geo2017 <- clean_names(read_csv("data/AllDHBsGeocoded.csv"), case = "snake") %>%
|
|
6
|
+ select(-facility_opening_date) %>%
|
|
7
|
+ mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
|
|
8
|
+ health_facility_code = ifelse(is.na(health_facility_code),
|
|
9
|
+ "Missing", health_facility_code))
|
|
10
|
+
|
|
11
|
+library(readxl)
|
|
12
|
+newFacil <- clean_names(read_excel("data/facilities20190902.xlsx"),
|
|
13
|
+ case = "snake") %>%
|
|
14
|
+ filter(facility_type_name == "GP Practice") %>%
|
|
15
|
+ select(-facility_closing_date, -facility_opening_date) %>%
|
|
16
|
+ mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
|
|
17
|
+ health_facility_code = ifelse(is.na(health_facility_code),
|
|
18
|
+ "Missing", health_facility_code))
|
|
19
|
+
|
|
20
|
+DHBs <- unique(c(newFacil$dhb_name, Geo2017$dhb_name))
|
|
21
|
+unique(newFacil$dhb_name) %in% unique(Geo2017$dhb_name)
|
|
22
|
+
|
|
23
|
+DHBs[!(DHBs %in% newFacil$dhb_name)]
|
|
24
|
+DHBs[!(DHBs %in% Geo2017$dhb_name)]
|
|
25
|
+nrow(newFacil)
|
|
26
|
+
|
|
27
|
+nrow(Geo2017)
|
|
28
|
+
|
|
29
|
+colSums(is.na(newFacil))
|
|
30
|
+colSums(is.na(Geo2017))
|
|
31
|
+
|
|
32
|
+gp_prac <- left_join(newFacil, Geo2017, by = c("name", "health_facility_code",
|
|
33
|
+ "hpi_facility_id",
|
|
34
|
+ "address", "dhb_name",
|
|
35
|
+ "fac_type", "facility_type_name"))
|
|
36
|
+colSums(is.na(gp_prac))
|
|
37
|
+
|
|
38
|
+gp_same <- filter(gp_prac, !is.na(gp_prac$calculated_address))
|
|
39
|
+gp_new <- filter(gp_prac, is.na(gp_prac$calculated_address)) %>%
|
|
40
|
+ select(-lon, -lat, -calculated_address)
|