Browse Source

Clean, find facilities to code

Petra Lamborn 4 years ago
parent
commit
e1dc31ae80
1 changed files with 40 additions and 0 deletions
  1. 40
    0
      scratch.R

+ 40
- 0
scratch.R View File

@@ -0,0 +1,40 @@
1
+# Scratch file
2
+library(janitor)
3
+library(dplyr)
4
+library(readr)
5
+Geo2017 <- clean_names(read_csv("data/AllDHBsGeocoded.csv"), case = "snake") %>%
6
+    select(-facility_opening_date) %>%
7
+    mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
8
+           health_facility_code = ifelse(is.na(health_facility_code),
9
+                                         "Missing", health_facility_code))
10
+    
11
+library(readxl)
12
+newFacil <- clean_names(read_excel("data/facilities20190902.xlsx"), 
13
+                        case = "snake") %>%
14
+    filter(facility_type_name == "GP Practice") %>% 
15
+    select(-facility_closing_date, -facility_opening_date) %>%
16
+    mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
17
+           health_facility_code = ifelse(is.na(health_facility_code),
18
+                                         "Missing", health_facility_code))
19
+    
20
+DHBs <- unique(c(newFacil$dhb_name, Geo2017$dhb_name))
21
+unique(newFacil$dhb_name) %in% unique(Geo2017$dhb_name)
22
+
23
+DHBs[!(DHBs %in% newFacil$dhb_name)]
24
+DHBs[!(DHBs %in% Geo2017$dhb_name)]
25
+nrow(newFacil)
26
+
27
+nrow(Geo2017)
28
+
29
+colSums(is.na(newFacil))
30
+colSums(is.na(Geo2017))
31
+
32
+gp_prac <- left_join(newFacil, Geo2017, by = c("name", "health_facility_code", 
33
+                                               "hpi_facility_id",
34
+                                               "address", "dhb_name",
35
+                                               "fac_type", "facility_type_name"))
36
+colSums(is.na(gp_prac))
37
+
38
+gp_same <- filter(gp_prac, !is.na(gp_prac$calculated_address))
39
+gp_new <- filter(gp_prac, is.na(gp_prac$calculated_address)) %>%
40
+    select(-lon, -lat, -calculated_address)