# ------------------------------------------------ # Read_EPC_csv.R # ------------------------------------------------ # Code provided as is and can be used or modified freely. # ------------------------------------------------ # Author: BIN CHI # UCL Centre for Advanced Spatial Analysis # bin.chi.16@ucl.ac.uk # Date: 15/11/2019 setwd("D:/EPC") #Domestic EPCs: to 31 August 2019 x1 <- list.files(path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE, recursive = FALSE) folder <- paste("D:/EPC",x1,"certificates.csv",sep="/") library(data.table) epcdata = data.table::rbindlist(lapply(folder, data.table::fread, showProgress = FALSE)) dim(epcdata) #18575357 84 #epcdata[!duplicated(epcdata), ] # no duplicated records head(epcdata) epcdata[, LMK_KEY:=NULL] epcdata[!duplicated(epcdata), ] dim(epcdata) # 18575357 83 setDF(epcdata) epcdata$add1 <- toupper(epcdata$ADDRESS1) epcdata$add2 <- toupper(epcdata$ADDRESS2) epcdata$add3 <- toupper(epcdata$ADDRESS3) epcdata$add <- toupper(epcdata$ADDRESS) epcdata$id<- rownames(epcdata) library("RPostgreSQL") library("sqldf") library("dplyr") drv <- dbDriver("PostgreSQL") con <- dbConnect(drv, dbname = "datajournal",port=5432, user="postgres",password=123456) dim(epcdata) #18575357 88 dbWriteTable(con, "data20190831id",value =epcdata, append = TRUE, row.names = FALSE) epc<-epcdata[,c("add1","add2","add3","add","POSTCODE","PROPERTY_TYPE","INSPECTION_DATE","LODGEMENT_DATE","TOTAL_FLOOR_AREA","NUMBER_HABITABLE_ROOMS","id")] dim(epc) #18575357 11 colnames(epc)<-c("address1","address2","address3","address","postcode","propertytype","inspectiondate","lodgementdate","tfarea","numberrooms","id") #18575357 11 dbWriteTable(con, "epc",value =epc, append = TRUE, row.names = FALSE)