Geocoding of publicly-available GP practice location in New Zealand

scratch.R 1.8KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # Scratch file
  2. library(janitor)
  3. library(dplyr)
  4. #library(ggmap)
  5. library(photon)
  6. library(readr)
  7. source("osm_api.R")
  8. Geo2017 <- clean_names(read_csv("data/AllDHBsGeocoded.csv"), case = "snake") %>%
  9. select(-facility_opening_date) %>%
  10. mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
  11. health_facility_code = ifelse(is.na(health_facility_code),
  12. "Missing", health_facility_code))
  13. library(readxl)
  14. newFacil <- clean_names(read_excel("data/facilities20190902.xlsx"),
  15. case = "snake") %>%
  16. filter(facility_type_name == "GP Practice") %>%
  17. select(-facility_closing_date, -facility_opening_date) %>%
  18. mutate(dhb_name = ifelse(is.na(dhb_name), "Missing", dhb_name),
  19. health_facility_code = ifelse(is.na(health_facility_code),
  20. "Missing", health_facility_code))
  21. DHBs <- unique(c(newFacil$dhb_name, Geo2017$dhb_name))
  22. unique(newFacil$dhb_name) %in% unique(Geo2017$dhb_name)
  23. DHBs[!(DHBs %in% newFacil$dhb_name)]
  24. DHBs[!(DHBs %in% Geo2017$dhb_name)]
  25. nrow(newFacil)
  26. nrow(Geo2017)
  27. colSums(is.na(newFacil))
  28. colSums(is.na(Geo2017))
  29. gp_prac <- left_join(newFacil, Geo2017, by = c("name", "health_facility_code",
  30. "hpi_facility_id",
  31. "address", "dhb_name",
  32. "fac_type", "facility_type_name"))
  33. colSums(is.na(gp_prac))
  34. gp_same <- filter(gp_prac, !is.na(gp_prac$calculated_address))
  35. gp_new <- filter(gp_prac, is.na(gp_prac$calculated_address)) %>%
  36. select(-lon, -lat, -calculated_address)
  37. table(newFacil$facility_type_name)
  38. table(Geo2017$facility_type_name)
  39. # newcoded <- osm_geocode("21 Ruakura Road, Hamilton East, Hamilton 3216")
  40. newcoded <- geocode(gp_new$address, limit = 1)