Browse Source

add old script

Petra Lamborn 4 years ago
parent
commit
36e42e4ed4
2 changed files with 79 additions and 0 deletions
  1. 76
    0
      geocodeGPs.R
  2. 3
    0
      scratch.R

+ 76
- 0
geocodeGPs.R View File

@@ -0,0 +1,76 @@
1
+#! /usr/bin/env Rscript
2
+# Original 2017 geocoding script
3
+
4
+library(dplyr)
5
+library(ggmap)
6
+
7
+allmed <- read.csv("../Data/facilities20170901.csv", stringsAsFactors=FALSE)
8
+
9
+gppractice <- filter(allmed, Facility.Type.Name == "GP Practice")
10
+gppractice[gppractice$DHB.Name == "NULL", "DHB.Name"] <- "Missing"
11
+DHBs <- unique(gppractice$DHB.Name)
12
+DHBsToGet <- 13
13
+
14
+# Notes:
15
+# * Clean up missing values
16
+# * Seems to fail for Taranaki
17
+
18
+for (dhb in DHBs[DHBsToGet]) {
19
+	print(dhb)
20
+	gpdhb <- filter(gppractice, DHB.Name == dhb)
21
+	print(nrow(gpdhb))
22
+	geodhb <- geocode(gpdhb$Address, output <- "latlona") %>% rename("Calculated.Address" = address)
23
+	gpdhb <- cbind(gpdhb, geodhb)
24
+	write.csv(gpdhb, file = paste0("../Data/", dhb, ".csv"), row.names=FALSE)
25
+}
26
+
27
+# gphv <- filter(gppractice, DHB.Name == "Hutt Valley District Health Board")
28
+# 
29
+# geocodes <- geocode(gphv$Address, output = "latlona")
30
+# geocodes <- rename(geocodes, "Calculated.Address" = address)
31
+# gphv <- cbind(gphv, geocodes)
32
+# 
33
+# write.csv(gphv, file="../Data/HuttDHB.csv")
34
+
35
+alldhbs <- NULL
36
+
37
+for (dhb in DHBs) {
38
+	alldhbs <- rbind(alldhbs, read.csv(paste0("../Data/", dhb, ".csv"), stringsAsFactors=FALSE))
39
+}
40
+
41
+write.csv(alldhbs, file="../Data/AllDHBsGeocoded.csv", row.names=FALSE)
42
+
43
+# Read in allDHB data
44
+alldhbs <- read.csv("../Data/AllDHBsGeocoded.csv", stringsAsFactors=FALSE)
45
+
46
+# Calc found and notfound
47
+found <- filter(alldhbs, !is.na(lon))
48
+notfound <- filter(alldhbs, is.na(lon))
49
+
50
+# Write found and not found
51
+write.csv(found, file="../Data/FoundGPs.csv", row.names=FALSE)
52
+notfound$lat <- NULL
53
+notfound$lon <- NULL
54
+notfound$Calculated.Address <- NULL
55
+write.csv(notfound, file="../Data/NotFoundGPs.csv", row.names=FALSE)
56
+
57
+# Read found and not found
58
+found <- read.csv("../Data/FoundGPs.csv", stringsAsFactors=FALSE)
59
+notfound <- read.csv("../Data/NotFoundGPs.csv", stringsAsFactors=FALSE)
60
+
61
+#notfound$Address <- paste0(notfound$Name, ", ", notfound$Address)
62
+nfgeo <- geocode(notfound$Address, output = "latlona")
63
+nfgeo <- rename(nfgeo, "Calculated.Address" = address)
64
+notfound <- cbind(notfound, nfgeo)
65
+
66
+
67
+# Join found and not found
68
+#notfound <- cbind(notfound, lon=NA, lat=NA, Calculated.Address=NA)
69
+alldhbs <- rbind(found, notfound)
70
+
71
+bad <- !grepl("new zealand", alldhbs$Calculated.Address)
72
+
73
+badcode <- subset(alldhbs, bad )
74
+goodcode <- subset(alldhbs, (!bad))
75
+notfound <- badcode
76
+found <- goodcode

+ 3
- 0
scratch.R View File

@@ -38,3 +38,6 @@ colSums(is.na(gp_prac))
38 38
 gp_same <- filter(gp_prac, !is.na(gp_prac$calculated_address))
39 39
 gp_new <- filter(gp_prac, is.na(gp_prac$calculated_address)) %>%
40 40
     select(-lon, -lat, -calculated_address)
41
+
42
+table(newFacil$facility_type_name)
43
+table(Geo2017$facility_type_name)