Geocoding of publicly-available GP practice location in New Zealand

geocodeGPs.R 2.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #! /usr/bin/env Rscript
  2. # Original 2017 geocoding script
  3. library(dplyr)
  4. library(ggmap)
  5. allmed <- read.csv("../Data/facilities20170901.csv", stringsAsFactors=FALSE)
  6. gppractice <- filter(allmed, Facility.Type.Name == "GP Practice")
  7. gppractice[gppractice$DHB.Name == "NULL", "DHB.Name"] <- "Missing"
  8. DHBs <- unique(gppractice$DHB.Name)
  9. DHBsToGet <- 13
  10. # Notes:
  11. # * Clean up missing values
  12. # * Seems to fail for Taranaki
  13. for (dhb in DHBs[DHBsToGet]) {
  14. print(dhb)
  15. gpdhb <- filter(gppractice, DHB.Name == dhb)
  16. print(nrow(gpdhb))
  17. geodhb <- geocode(gpdhb$Address, output <- "latlona") %>% rename("Calculated.Address" = address)
  18. gpdhb <- cbind(gpdhb, geodhb)
  19. write.csv(gpdhb, file = paste0("../Data/", dhb, ".csv"), row.names=FALSE)
  20. }
  21. # gphv <- filter(gppractice, DHB.Name == "Hutt Valley District Health Board")
  22. #
  23. # geocodes <- geocode(gphv$Address, output = "latlona")
  24. # geocodes <- rename(geocodes, "Calculated.Address" = address)
  25. # gphv <- cbind(gphv, geocodes)
  26. #
  27. # write.csv(gphv, file="../Data/HuttDHB.csv")
  28. alldhbs <- NULL
  29. for (dhb in DHBs) {
  30. alldhbs <- rbind(alldhbs, read.csv(paste0("../Data/", dhb, ".csv"), stringsAsFactors=FALSE))
  31. }
  32. write.csv(alldhbs, file="../Data/AllDHBsGeocoded.csv", row.names=FALSE)
  33. # Read in allDHB data
  34. alldhbs <- read.csv("../Data/AllDHBsGeocoded.csv", stringsAsFactors=FALSE)
  35. # Calc found and notfound
  36. found <- filter(alldhbs, !is.na(lon))
  37. notfound <- filter(alldhbs, is.na(lon))
  38. # Write found and not found
  39. write.csv(found, file="../Data/FoundGPs.csv", row.names=FALSE)
  40. notfound$lat <- NULL
  41. notfound$lon <- NULL
  42. notfound$Calculated.Address <- NULL
  43. write.csv(notfound, file="../Data/NotFoundGPs.csv", row.names=FALSE)
  44. # Read found and not found
  45. found <- read.csv("../Data/FoundGPs.csv", stringsAsFactors=FALSE)
  46. notfound <- read.csv("../Data/NotFoundGPs.csv", stringsAsFactors=FALSE)
  47. #notfound$Address <- paste0(notfound$Name, ", ", notfound$Address)
  48. nfgeo <- geocode(notfound$Address, output = "latlona")
  49. nfgeo <- rename(nfgeo, "Calculated.Address" = address)
  50. notfound <- cbind(notfound, nfgeo)
  51. # Join found and not found
  52. #notfound <- cbind(notfound, lon=NA, lat=NA, Calculated.Address=NA)
  53. alldhbs <- rbind(found, notfound)
  54. bad <- !grepl("new zealand", alldhbs$Calculated.Address)
  55. badcode <- subset(alldhbs, bad )
  56. goodcode <- subset(alldhbs, (!bad))
  57. notfound <- badcode
  58. found <- goodcode