#! /usr/bin/env Rscript # Original 2017 geocoding script library(dplyr) library(ggmap) allmed <- read.csv("../Data/facilities20170901.csv", stringsAsFactors=FALSE) gppractice <- filter(allmed, Facility.Type.Name == "GP Practice") gppractice[gppractice$DHB.Name == "NULL", "DHB.Name"] <- "Missing" DHBs <- unique(gppractice$DHB.Name) DHBsToGet <- 13 # Notes: # * Clean up missing values # * Seems to fail for Taranaki for (dhb in DHBs[DHBsToGet]) { print(dhb) gpdhb <- filter(gppractice, DHB.Name == dhb) print(nrow(gpdhb)) geodhb <- geocode(gpdhb$Address, output <- "latlona") %>% rename("Calculated.Address" = address) gpdhb <- cbind(gpdhb, geodhb) write.csv(gpdhb, file = paste0("../Data/", dhb, ".csv"), row.names=FALSE) } # gphv <- filter(gppractice, DHB.Name == "Hutt Valley District Health Board") # # geocodes <- geocode(gphv$Address, output = "latlona") # geocodes <- rename(geocodes, "Calculated.Address" = address) # gphv <- cbind(gphv, geocodes) # # write.csv(gphv, file="../Data/HuttDHB.csv") alldhbs <- NULL for (dhb in DHBs) { alldhbs <- rbind(alldhbs, read.csv(paste0("../Data/", dhb, ".csv"), stringsAsFactors=FALSE)) } write.csv(alldhbs, file="../Data/AllDHBsGeocoded.csv", row.names=FALSE) # Read in allDHB data alldhbs <- read.csv("../Data/AllDHBsGeocoded.csv", stringsAsFactors=FALSE) # Calc found and notfound found <- filter(alldhbs, !is.na(lon)) notfound <- filter(alldhbs, is.na(lon)) # Write found and not found write.csv(found, file="../Data/FoundGPs.csv", row.names=FALSE) notfound$lat <- NULL notfound$lon <- NULL notfound$Calculated.Address <- NULL write.csv(notfound, file="../Data/NotFoundGPs.csv", row.names=FALSE) # Read found and not found found <- read.csv("../Data/FoundGPs.csv", stringsAsFactors=FALSE) notfound <- read.csv("../Data/NotFoundGPs.csv", stringsAsFactors=FALSE) #notfound$Address <- paste0(notfound$Name, ", ", notfound$Address) nfgeo <- geocode(notfound$Address, output = "latlona") nfgeo <- rename(nfgeo, "Calculated.Address" = address) notfound <- cbind(notfound, nfgeo) # Join found and not found #notfound <- cbind(notfound, lon=NA, lat=NA, Calculated.Address=NA) alldhbs <- rbind(found, notfound) bad <- !grepl("new zealand", alldhbs$Calculated.Address) badcode <- subset(alldhbs, bad ) goodcode <- subset(alldhbs, (!bad)) notfound <- badcode found <- goodcode