library(data.table)
library(ggplot2)
library(lubridate)
dataFolder = "../input"
dtTrain = fread(file.path(dataFolder,"train.csv"))
weather = fread(file.path(dataFolder, "weather.csv"))
dtTrain[,Date:=as.Date(Date)]
dtTrain[,':=' (year=year(Date), dayOfYear=yday(Date))]
weather[Tavg=='M', Tavg:='-1']
weather[,':=' (Date=as.Date(Date),Tmax=as.integer(Tmax),Tmin=as.integer(Tmin),Tavg=as.integer(Tavg))]
weather[, ':=' (year=year(Date), dayOfYear=yday(Date))]
weather[Tavg==-1,Tavg:=as.integer((Tmax+Tmin)/2)]
mosquitosStats<-dtTrain[,.(dayOfYear,year,NumMosquitos.sum=sum(NumMosquitos)),by=Date]
log_scale_mosquitos<-ggplot(mosquitosStats)+geom_point(aes(dayOfYear, log(NumMosquitos.sum),color=NumMosquitos.sum))+
facet_grid(year ~ .)+
scale_color_gradient(low="blue", high="Red")+
ggtitle("Total mosquitos by Day")
ggsave("log_scale_mosquitos.png", log_scale_mosquitos)
mosquitosSum<-dtTrain[,.(NumMosquitos.sum=sum(NumMosquitos)),by=Date]
weatherTrain<-weather[Station==1&year%%2==1,]
mosquitosByDate<-merge(weatherTrain,mosquitosSum,by="Date",all.x=TRUE)
mosquitosByDate[is.na(NumMosquitos.sum), NumMosquitos.sum:=0]
mosquitos_temperature_plot <- ggplot(mosquitosByDate)+geom_point(aes(dayOfYear, Tavg/10,color=Tavg))+
geom_line(aes(dayOfYear, log(NumMosquitos.sum)),color="olivedrab")+
facet_grid(year ~ .)+
scale_color_gradient(low="blue", high="Red")+
ggtitle("log(NumMosquitos.sum) and Tavg plot")
ggsave("mosquitos_temperature_plot.png",mosquitos_temperature_plot)
This script has been released under the
Apache 2.0 open source license.