## The goal is to make boring and uninteresting be ok. ## This is surprisingly easy. When the data has lots structure (lots of information) exciting and interesting patterns emerge. ## When it is random the graphs look inherently less interesting. This is exactly what we want, residual should have not structure, they should be random. ## Lets assume you built or inherited a model that has a unit root but did not first differenced the variables. Lets complete ignore why this has occurred. ## ## The data is Composite Indexes of Leading, Coincident, and Lagging Economic Indicators for the US: 1960 to 2006 from InfoChimps.org ## The model's goal is to predict next year's coincident economic activity using this years leading indicators and hopefully creating colorful errors in the process. library(grid) require(graphics) ## Run a simple linear regression mdl.v1 <-lm(EMP ~ L1_GDP + L1_WAGES ,data = df.Mdl[1:SmpEnd, ]) summary(mdl.v1 ) ## Normaized residuals using within sample varience (this is the Y) v1.pred <- predict(mdl.v1 , df.Mdl[OutSmpStart:OutSmpEnd,]) v1.res <- v1.pred- df.Mdl[OutSmpStart:OutSmpEnd,]$EMP v1.var <- var(df.Mdl[1:SmpEnd, ]$EMP ) v1.NormRes <- v1.res/sqrt(v1.var ) ## Squared value adjusted by max squared value across entire sample(this is the radius) # Gives a sense of magnitude of the estimate v1.maxpred <- max(v1.pred^2) v1.predAdj <- v1.pred^2/v1.maxpred ## The ratio of the dependant over lagged dependant indicates color. ## It is good to put in measures testing if there has been a concept drift in the model since it was built. v1.tRateOfChange <- abs(df.Mdl[(OutSmpStart-1):(OutSmpEnd-1),]$EMP/ df.Mdl[OutSmpStart:(OutSmpEnd),]$EMP) v1.maxRateOfChange <- max(v1.tRateOfChange) v1.RateOfChange <- v1.tRateOfChange/v1.maxRateOfChange ## Create a new plot dev.new() plot.new() grid.newpage() ## Set the plot options op <- par(bg ="black", col="white", col.lab ="white" ,col.axis ="white" , col.main ="white" ,col.sub ="white" ) plot(c(1996, 2009), c(-40, 40), type = "n", xlab="Year", ylab="Normalized Residuals", main = "Emp = L1 GNP + L1Wages", sub="Color: Value Radius: Change in Res " ) abline( h=0, col = "white") palette(rainbow(200)) loops<- OutSmpEnd-OutSmpStart ## loop through the data i <- (1:loops) { ptx = 1996 + i/4 pty = v1.NormRes[i]*100 ptr = v1.predAdj[i]*2 ptcolor = 35 + v1.RateOfChange[i]*165 points(ptx,pty , pch = 19, col =ptcolor, bg =ptcolor ,cex= ptr) } ########## # There is structure in the residuals. This is bad. # First off the residuals are trending up indicating heteroscedasticity. Also note that large values (the size of the circles) have larger errors. # Secondly, the color is uniform and high on the color chart showing a strong correlation between the dependant and it's lagged value. # This indicates a unit root. Take the log of the variables to deal with the heteroscedasticity and correct for the unit root by taking first difference. ########## ## Run a simple linear regression mdl.v2 <-lm(EMP_log_1D ~ L1_GDP_log_1D + L1_WAGES_log_1D ,data = df.Mdl.2[1:SmpEnd, ]) summary(mdl.v2 ) ## Normaized residuals using within sample varience (this is the Y) v2.pred <- predict(mdl.v2 , df.Mdl.2[OutSmpStart:OutSmpEnd,]) v2.res <- v2.pred- df.Mdl.2[OutSmpStart:OutSmpEnd,]$EMP_log_1D v2.var <- var(df.Mdl.2[1:SmpEnd, ]$EMP_log_1D ) v2.NormRes <- v2.res/sqrt(v2.var ) ## Squared value adjusted by max squared value across entire sample(this is the radius) v2.maxpred <- max(v2.pred[1:49]^2) v2.predAdj <- v2.pred^2/v2.maxpred ## The ratio of the dependant over lagged dependant indicates color. v2.tRateOfChange <- abs(df.Mdl.2[(OutSmpStart-1):(OutSmpEnd-1),]$EMP_log_1D/ df.Mdl.2[OutSmpStart:(OutSmpEnd),]$EMP_log_1D) v2.maxRateOfChange <- max(v2.tRateOfChange[1:49]) v2.RateOfChange <- v2.tRateOfChange/v2.maxRateOfChange ## Create a new plot dev.new() plot.new() grid.newpage() ## Set the plot options op <- par(bg ="black", col="white", col.lab ="white" ,col.axis ="white" , col.main ="white" ,col.sub ="white" ) plot(c(1996, 2009), c(-100, 100), type = "n", xlab="Year", ylab="Normalized Residuals", main = "Log 1D Emp = Log 1D L1 GNP + Log 1D L1 Wages", sub="Color: Value Radius: Change in Res " ) abline( h=0, col = "white") palette(rainbow(200)) loops<- OutSmpEnd-OutSmpStart-1 ## Loop through the data i <- (1:loops) { ptx = 1996+ i/4 pty = v2.NormRes[i]*100 ptr = v2.predAdj[i]*5 ptcolor = 35+ v2.RateOfChange[i]*165 points(ptx,pty , pch = 19, col =ptcolor, bg =ptcolor ,cex= ptr) } ########### Note on color ## Why did I only use the range 24-200 from the rainbow palette? Easier to see. dev.new() plot.new() grid.newpage() palette(rainbow(200)) i <- (1:200) { ptx=.005*i pty=.005*i grid.circle(x=ptx, y=pty, r=.01, default.units="npc", name=NULL ,gp=gpar(col="white",fill= i ), draw=TRUE, vp=NULL) } palette(rainbow(200)) i <- (35:200) { ptx=.005*i pty=.1 + .005*i grid.circle(x=ptx, y=pty, r=.01, default.units="npc", name=NULL ,gp=gpar(col="white",fill= i ), draw=TRUE, vp=NULL) } ################# ##END OF FILE #################