Section 22 Code
The following custom functions were written to enable Data Enrichment:
Custom Functions
The Reverse Geocode function is shown below:
library(httr)
#Input1 is the row in the data-set we want to reverse geocode
#Input2 is the original data-set
#Input3 is the API access key
ReverseGeo<-function(Input1,Input2,Input3){
#initialise some variables
j<-c(0)
k<-c(0)
LatLng<-c(0)
KeyVal<-c(0)
Output<-c(0)
Output2<-rep(NA,each=7)
#Put the inputs into dummy variables
j<-Input1
LatLng<-paste(Input2[j,2],",",Input2[j,3],sep="")
KeyVal<-Input3
#Use HTTR to query google and store the results
sample2<-GET("https://maps.googleapis.com/maps/api/geocode/json",
query=list(latlng=LatLng,
key=KeyVal))
#Parse the results using httr
Output<-content(sample2)
#Parse the response again
#Ensures the information is placed in the correct order
for (k in 1:(min(7,length(Output$results[[1]]$address_components))))
{
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="street_number")
{Output2[1]<-strsplit(
Output$results[[1]]$address_components[[k]]$long_name,
"-")[[1]][1]}
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="route")
{Output2[2]<-Output$results[[1]]$address_components[[k]]$long_name}
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="locality")
{Output2[3]<-Output$results[[1]]$address_components[[k]]$long_name}
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="administrative_area_level_2")
{Output2[4]<-Output$results[[1]]$address_components[[k]]$long_name}
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="administrative_area_level_1")
{Output2[5]<-Output$results[[1]]$address_components[[k]]$long_name}
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="country")
{Output2[6]<-Output$results[[1]]$address_components[[k]]$long_name}
if (Output$results[[1]]$address_components[[k]]$types[[1]]=="postal_code")
{Output2[7]<-Output$results[[1]]$address_components[[k]]$long_name}
}
#Return the parsed address details
Output2
}
The Radar Search function is shown below:
#################
# Created by: Ed Anderson
# Date: 09/04/2017
##################
# Radar Search With Google API
# Function contacts the Google API and returns local area information
# Example Query:
#
# RadarSearch(1,data2[,c("Lat","Long")],100,"AIzaSyC5dvHAt1QbcWtdIaDWHnLLUaAARJSSSPs","School")
#
# A key value needs to be obtained from here:
# Here is my key "AIzaSyC5dvHAt1QbcWtdIaDWHnLLUaAARJSSSPs"
# https://developers.google.com/maps/documentation/geocoding/get-api-key
## Load Packages
library(httr)
#Input1 is an integer value, it identifies the row in the data-frame
#Input2 is a data-frame, it contains the lat-long information of the properties
#Input3 is a numeric value. It is used for the radius of the search
#Input4 is the Google API key
#Input5 is the Place of Interest (eg. Restaurants)
RadarSearch<-function(Input1,Input2,Input3,Input4,Input5){
#Declare some variables
i<-c(0)
LatLon<-c(0)
Radius<-c(0)
Type<-c(0)
Key<-c(0)
#Populate the variables
i<-Input1
LatLong<-paste(Input2[i,1],",",Input2[i,2],sep="")
Radius<- paste(Input3)
Type<- Input5
Key<-Input4
#Build an API query and store the results
sample2<-GET("https://maps.googleapis.com/maps/api/place/nearbysearch/json",
query=list(location=LatLong,
radius=Radius,
#type=typ,
types=Type,
#keyword=keyw,
key=Key))
#Output the number of successful search results
length(content(sample2)$results)
}
######################################
The Zillow Rental Estimate function is shown below:
Zillowf<-function(property) {
#Create a local variable to store the search results
Output<-list(zpid=character(999),
AddressStreet=character(999),
AddressZipCode=character(999),
AddressCity=character(999),
AddressState=character(999),
AddressLatitude=character(999),
AddressLongitude=character(999),
UseCode=as.character(999),
YearBuilt=as.character(999),
LotSizeSqFt=as.character(999),
FinishedSizeSqFt=as.character(999),
BathroomNo=as.character(999),
BedroomNo=as.character(999),
#LastSoldDate=character(999),
#LastSoldPrice=character(999),
ZestimateAmount=as.character(999),
ZestimateLastUpdated=as.character(999),
ZestimateOneWeekChange=as.character(999),
ZestimateValueChange=as.character(999),
ZestimateValueChangeDuration=as.character(999),
ZestimateLowValueRange=as.character(999),
ZestimateHighValueRange=as.character(999),
ZestimateValuePercentile=as.character(999),
RZestimateAmount=as.character(999),
RZestimateLastUpdated=as.character(999),
RZestimateOneWeekChange=as.character(999),
RZestimateValueChange=as.character(999),
RZestimateValueChangeDuration=as.character(999),
RZestimateLowValueRange=as.character(999),
RZestimateHighValueRange=as.character(999),
RegionIndexValue=as.character(999),
RegionAttributeName=as.character(999),
RegionAttributeNeighbourhood=as.character(999),
RegionAttributeID=as.character(999))
# Loop through the list of results and place the data in appropriate place
for (k in 1:length(row.names(property))){
if (row.names(property)[k]=="zpid")
{Output$zpid=property[[k]]}
else if (row.names(property)[k]=="address")
{Output$AddressStreet=property[[k]]$street
Output$AddressZipCode=property[[k]]$zipcode
Output$AddressCity=property[[k]]$city
Output$AddressState=property[[k]]$state
Output$AddressLatitude=property[[k]]$latitude
Output$AddressLongitude=property[[k]]$longitude}
else if (row.names(property)[k]=="useCode")
{
Output$UseCode=property[[k]]
}
else if (row.names(property)[k]=="finishedSqFt")
{
Output$FinishedSizeSqFt=property[[k]]
}
else if (row.names(property)[k]=="bathrooms")
{
Output$BathroomNo=property[[k]]
}
else if (row.names(property)[k]=="bedrooms")
{
Output$BedroomNo=property[[k]]
}
else if (row.names(property)[k]=="zestimate")
{
if(length(property[[k]]$amount)!=1){Output$ZestimateAmount=property[[k]]$amount$text} else {Output$ZestimateAmount="999"}
Output$ZestimateLastUpdated=property[[k]]$`last-updated`
Output$ZestimateOneWeekChange=property[[k]]$oneWeekChange[[1]]
if(!is.null(property[[k]]$valueChange$text)) {Output$ZestimateValueChange=property[[k]]$valueChange$text}
if (!is.null(property[[k]]$valueChange$.attrs[[1]])) {Output$ZestimateValueChangeDuration=property[[k]]$valueChange$.attrs[[1]]}
Output$ZestimateLowValueRange=property[[k]]$valuationRange[[1]]
if (length(property[[k]]$valuationRange)>2) {Output$ZestimateHighValueRange=property[[k]]$valuationRange[[3]]} else {Output$ZestimateHighValueRange="999"}
Output$ZestimateValuePercentile=property[[k]]$percentile[[1]]
}
else if (row.names(property)[k]=="rentzestimate")
{
Output$RZestimateAmount=property[[k]]$amount$text
Output$RZestimateLastUpdated=property[[k]]$`last-updated`
Output$RZestimateOneWeekChange=property[[k]]$oneWeekChange[[1]]
Output$RZestimateValueChange=property[[k]]$valueChange$text
Output$RZestimateValueChangeDuration=property[[k]]$valueChange$.attrs[[1]]
Output$RZestimateLowValueRange=property[[k]]$valuationRange[[1]]
Output$RZestimateHighValueRange=property[[k]]$valuationRange[[3]]
}
else if (row.names(property)[k]=="localRealEstate")
{
Output$RegionIndexValue="999" #property[[k]][[1]]
Output$RegionAttributeName="999" #property[[k]][[3]][[1]]
Output$RegionAttributeNeighbourhood="999" #property[[k]][[3]][[3]]
Output$RegionAttributeID="999" #property[[k]][[3]][[2]]
}
}
Output
}