Geocoding of publicly-available GP practice location in New Zealand

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # Scratch file
  2. library(janitor)
  3. library(dplyr)
  4. library(ggmap)
  5. library(readr)
  6. Geo2017 <- clean_names(read_csv("data/AllDHBsGeocoded.csv"), case = "snake") %>%
  7. select(-facility_opening_date) %>%
  8. mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
  9. health_facility_code = ifelse(is.na(health_facility_code),
  10. "Missing", health_facility_code))
  11. library(readxl)
  12. newFacil <- clean_names(read_excel("data/facilities20190902.xlsx"),
  13. case = "snake") %>%
  14. filter(facility_type_name == "GP Practice") %>%
  15. select(-facility_closing_date, -facility_opening_date) %>%
  16. mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
  17. health_facility_code = ifelse(is.na(health_facility_code),
  18. "Missing", health_facility_code))
  19. DHBs <- unique(c(newFacil$dhb_name, Geo2017$dhb_name))
  20. unique(newFacil$dhb_name) %in% unique(Geo2017$dhb_name)
  21. DHBs[!(DHBs %in% newFacil$dhb_name)]
  22. DHBs[!(DHBs %in% Geo2017$dhb_name)]
  23. nrow(newFacil)
  24. nrow(Geo2017)
  25. colSums(is.na(newFacil))
  26. colSums(is.na(Geo2017))
  27. gp_prac <- left_join(newFacil, Geo2017, by = c("name", "health_facility_code",
  28. "hpi_facility_id",
  29. "address", "dhb_name",
  30. "fac_type", "facility_type_name"))
  31. colSums(is.na(gp_prac))
  32. gp_same <- filter(gp_prac, !is.na(gp_prac$calculated_address))
  33. gp_new <- filter(gp_prac, is.na(gp_prac$calculated_address)) %>%
  34. select(-lon, -lat, -calculated_address)
  35. table(newFacil$facility_type_name)
  36. table(Geo2017$facility_type_name)
  37. newcoded <- geocode(gp_new$address,
  38. output = "latlona") %>%
  39. rename("calculated_address" = address)