12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- #! /usr/bin/env Rscript
- # Original 2017 geocoding script
-
- library(dplyr)
- library(ggmap)
-
- allmed <- read.csv("../Data/facilities20170901.csv", stringsAsFactors=FALSE)
-
- gppractice <- filter(allmed, Facility.Type.Name == "GP Practice")
- gppractice[gppractice$DHB.Name == "NULL", "DHB.Name"] <- "Missing"
- DHBs <- unique(gppractice$DHB.Name)
- DHBsToGet <- 13
-
- # Notes:
- # * Clean up missing values
- # * Seems to fail for Taranaki
-
- for (dhb in DHBs[DHBsToGet]) {
- print(dhb)
- gpdhb <- filter(gppractice, DHB.Name == dhb)
- print(nrow(gpdhb))
- geodhb <- geocode(gpdhb$Address, output <- "latlona") %>% rename("Calculated.Address" = address)
- gpdhb <- cbind(gpdhb, geodhb)
- write.csv(gpdhb, file = paste0("../Data/", dhb, ".csv"), row.names=FALSE)
- }
-
- # gphv <- filter(gppractice, DHB.Name == "Hutt Valley District Health Board")
- #
- # geocodes <- geocode(gphv$Address, output = "latlona")
- # geocodes <- rename(geocodes, "Calculated.Address" = address)
- # gphv <- cbind(gphv, geocodes)
- #
- # write.csv(gphv, file="../Data/HuttDHB.csv")
-
- alldhbs <- NULL
-
- for (dhb in DHBs) {
- alldhbs <- rbind(alldhbs, read.csv(paste0("../Data/", dhb, ".csv"), stringsAsFactors=FALSE))
- }
-
- write.csv(alldhbs, file="../Data/AllDHBsGeocoded.csv", row.names=FALSE)
-
- # Read in allDHB data
- alldhbs <- read.csv("../Data/AllDHBsGeocoded.csv", stringsAsFactors=FALSE)
-
- # Calc found and notfound
- found <- filter(alldhbs, !is.na(lon))
- notfound <- filter(alldhbs, is.na(lon))
-
- # Write found and not found
- write.csv(found, file="../Data/FoundGPs.csv", row.names=FALSE)
- notfound$lat <- NULL
- notfound$lon <- NULL
- notfound$Calculated.Address <- NULL
- write.csv(notfound, file="../Data/NotFoundGPs.csv", row.names=FALSE)
-
- # Read found and not found
- found <- read.csv("../Data/FoundGPs.csv", stringsAsFactors=FALSE)
- notfound <- read.csv("../Data/NotFoundGPs.csv", stringsAsFactors=FALSE)
-
- #notfound$Address <- paste0(notfound$Name, ", ", notfound$Address)
- nfgeo <- geocode(notfound$Address, output = "latlona")
- nfgeo <- rename(nfgeo, "Calculated.Address" = address)
- notfound <- cbind(notfound, nfgeo)
-
-
- # Join found and not found
- #notfound <- cbind(notfound, lon=NA, lat=NA, Calculated.Address=NA)
- alldhbs <- rbind(found, notfound)
-
- bad <- !grepl("new zealand", alldhbs$Calculated.Address)
-
- badcode <- subset(alldhbs, bad )
- goodcode <- subset(alldhbs, (!bad))
- notfound <- badcode
- found <- goodcode
|