|
@@ -0,0 +1,76 @@
|
|
1
|
+#! /usr/bin/env Rscript
|
|
2
|
+# Original 2017 geocoding script
|
|
3
|
+
|
|
4
|
+library(dplyr)
|
|
5
|
+library(ggmap)
|
|
6
|
+
|
|
7
|
+allmed <- read.csv("../Data/facilities20170901.csv", stringsAsFactors=FALSE)
|
|
8
|
+
|
|
9
|
+gppractice <- filter(allmed, Facility.Type.Name == "GP Practice")
|
|
10
|
+gppractice[gppractice$DHB.Name == "NULL", "DHB.Name"] <- "Missing"
|
|
11
|
+DHBs <- unique(gppractice$DHB.Name)
|
|
12
|
+DHBsToGet <- 13
|
|
13
|
+
|
|
14
|
+# Notes:
|
|
15
|
+# * Clean up missing values
|
|
16
|
+# * Seems to fail for Taranaki
|
|
17
|
+
|
|
18
|
+for (dhb in DHBs[DHBsToGet]) {
|
|
19
|
+ print(dhb)
|
|
20
|
+ gpdhb <- filter(gppractice, DHB.Name == dhb)
|
|
21
|
+ print(nrow(gpdhb))
|
|
22
|
+ geodhb <- geocode(gpdhb$Address, output <- "latlona") %>% rename("Calculated.Address" = address)
|
|
23
|
+ gpdhb <- cbind(gpdhb, geodhb)
|
|
24
|
+ write.csv(gpdhb, file = paste0("../Data/", dhb, ".csv"), row.names=FALSE)
|
|
25
|
+}
|
|
26
|
+
|
|
27
|
+# gphv <- filter(gppractice, DHB.Name == "Hutt Valley District Health Board")
|
|
28
|
+#
|
|
29
|
+# geocodes <- geocode(gphv$Address, output = "latlona")
|
|
30
|
+# geocodes <- rename(geocodes, "Calculated.Address" = address)
|
|
31
|
+# gphv <- cbind(gphv, geocodes)
|
|
32
|
+#
|
|
33
|
+# write.csv(gphv, file="../Data/HuttDHB.csv")
|
|
34
|
+
|
|
35
|
+alldhbs <- NULL
|
|
36
|
+
|
|
37
|
+for (dhb in DHBs) {
|
|
38
|
+ alldhbs <- rbind(alldhbs, read.csv(paste0("../Data/", dhb, ".csv"), stringsAsFactors=FALSE))
|
|
39
|
+}
|
|
40
|
+
|
|
41
|
+write.csv(alldhbs, file="../Data/AllDHBsGeocoded.csv", row.names=FALSE)
|
|
42
|
+
|
|
43
|
+# Read in allDHB data
|
|
44
|
+alldhbs <- read.csv("../Data/AllDHBsGeocoded.csv", stringsAsFactors=FALSE)
|
|
45
|
+
|
|
46
|
+# Calc found and notfound
|
|
47
|
+found <- filter(alldhbs, !is.na(lon))
|
|
48
|
+notfound <- filter(alldhbs, is.na(lon))
|
|
49
|
+
|
|
50
|
+# Write found and not found
|
|
51
|
+write.csv(found, file="../Data/FoundGPs.csv", row.names=FALSE)
|
|
52
|
+notfound$lat <- NULL
|
|
53
|
+notfound$lon <- NULL
|
|
54
|
+notfound$Calculated.Address <- NULL
|
|
55
|
+write.csv(notfound, file="../Data/NotFoundGPs.csv", row.names=FALSE)
|
|
56
|
+
|
|
57
|
+# Read found and not found
|
|
58
|
+found <- read.csv("../Data/FoundGPs.csv", stringsAsFactors=FALSE)
|
|
59
|
+notfound <- read.csv("../Data/NotFoundGPs.csv", stringsAsFactors=FALSE)
|
|
60
|
+
|
|
61
|
+#notfound$Address <- paste0(notfound$Name, ", ", notfound$Address)
|
|
62
|
+nfgeo <- geocode(notfound$Address, output = "latlona")
|
|
63
|
+nfgeo <- rename(nfgeo, "Calculated.Address" = address)
|
|
64
|
+notfound <- cbind(notfound, nfgeo)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+# Join found and not found
|
|
68
|
+#notfound <- cbind(notfound, lon=NA, lat=NA, Calculated.Address=NA)
|
|
69
|
+alldhbs <- rbind(found, notfound)
|
|
70
|
+
|
|
71
|
+bad <- !grepl("new zealand", alldhbs$Calculated.Address)
|
|
72
|
+
|
|
73
|
+badcode <- subset(alldhbs, bad )
|
|
74
|
+goodcode <- subset(alldhbs, (!bad))
|
|
75
|
+notfound <- badcode
|
|
76
|
+found <- goodcode
|