Geocoding of publicly-available GP practice location in New Zealand

scratch.R 1.7KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # Scratch file
  2. library(janitor)
  3. library(dplyr)
  4. library(readr)
  5. Geo2017 <- clean_names(read_csv("data/AllDHBsGeocoded.csv"), case = "snake") %>%
  6. select(-facility_opening_date) %>%
  7. mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
  8. health_facility_code = ifelse(is.na(health_facility_code),
  9. "Missing", health_facility_code))
  10. library(readxl)
  11. newFacil <- clean_names(read_excel("data/facilities20190902.xlsx"),
  12. case = "snake") %>%
  13. filter(facility_type_name == "GP Practice") %>%
  14. select(-facility_closing_date, -facility_opening_date) %>%
  15. mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
  16. health_facility_code = ifelse(is.na(health_facility_code),
  17. "Missing", health_facility_code))
  18. DHBs <- unique(c(newFacil$dhb_name, Geo2017$dhb_name))
  19. unique(newFacil$dhb_name) %in% unique(Geo2017$dhb_name)
  20. DHBs[!(DHBs %in% newFacil$dhb_name)]
  21. DHBs[!(DHBs %in% Geo2017$dhb_name)]
  22. nrow(newFacil)
  23. nrow(Geo2017)
  24. colSums(is.na(newFacil))
  25. colSums(is.na(Geo2017))
  26. gp_prac <- left_join(newFacil, Geo2017, by = c("name", "health_facility_code",
  27. "hpi_facility_id",
  28. "address", "dhb_name",
  29. "fac_type", "facility_type_name"))
  30. colSums(is.na(gp_prac))
  31. gp_same <- filter(gp_prac, !is.na(gp_prac$calculated_address))
  32. gp_new <- filter(gp_prac, is.na(gp_prac$calculated_address)) %>%
  33. select(-lon, -lat, -calculated_address)
  34. table(newFacil$facility_type_name)
  35. table(Geo2017$facility_type_name)