It’s difficult to grasp what is happening to the entire S&P 500 all at once. A friend of mine who was a stellar technical analyst came up with an idea to view the S&P 500 as a packed array of boxes, organized by sector and with each stocks ‘box’ area scaled to market cap. Then each box was colour-coded by return for the month. The net result was something like this:
I recreated that image for the monthly economics report using the following code. The boxChartHelper function takes a data.frame holding the stocks and returns another data.frame with the co-ordinates for each area. The data frame should have a column named “size” which is used to scale the boxes. The plotBoxChart function takes a data.frame of stocks (with columns specifying their ticker, sector, size or market cap, and the colour you want the box) and plots it using base graphics.
I know: (1) Hadley Wickham could do it in 2 lines with ggplot2, and (2) this isn’t a box plot so the naming is terrible. I’m just putting it here for reference because some people wanted to know how I make the chart – hint: not in Excel.
boxChartHelper<-function(data, # data.frame holding data
left, right, bottom, top) {
if(is.na(sum(data$size))) browser()
# calc values
width = right - left
height = top - bottom
aspect = width/height # asp > 1, width bigger than height
# if there is only one row, set and leave
if (nrow(data)==1) {
data$left = left
data$right = right
data$left = left
data$top = top
data$bottom = bottom
return(data)
}
# if two rows, split by width
if (nrow(data)==2) {
if (aspect >= 1) { # split the width
data$top = top
data$bottom = bottom
width.1 = width*(data$size[1]/sum(data$size))
data$left = c(left, left+width.1)
data$right = c(left+width.1, right)
return(data)
} else { # split the height
data$left = left
data$right = right
height.1 = height*(data$size[1]/sum(data$size))
data$bottom = c(bottom, bottom + height.1)
data$top = c(bottom + height.1, top)
return(data)
}
}
# else, cut into two and recurse
data<-data[order(data$size, decreasing=TRUE),]
splitter <- c(TRUE, rep(FALSE, nrow(data)-1))
for(i in 2:nrow(data)) {
if(sum(data$size[splitter])/sum(data$size) > 0.5) break
splitter[i] <- TRUE
}
if (aspect >= 1) { # split the width
width.1 <- width*sum(data$size[splitter])/sum(data$size)
return(rbind(boxChartHelper(data[splitter,], left, left+width.1, bottom, top),
boxChartHelper(data[!splitter,], left+width.1, right, bottom, top)))
} else { # split the height
height.1 <- height*sum(data$size[splitter])/sum(data$size)
return(rbind(boxChartHelper(data[splitter,], left, right, bottom, bottom+height.1),
boxChartHelper(data[!splitter,], left, right, bottom+height.1, top)))
}
}
plotBoxChart<-function(data,
sec.col="Sector",
size.col="MarketCap",
change.col="Change",
colour.col="colour",
title=NULL,
bottom.space=4,
show.box.labels=FALSE) {
# make sure the data is okay
if (any(is.na(data[[sec.col]]))) stop("Error: NA Sectors")
if (any(is.na(data[[size.col]]))) stop("Error: NA Sizes")
if(!is.null(title)) title.space = 2 else title.space=0.25
par(mar=c(bottom.space,0.25,title.space,0.25), cex=1)
aspect=dev.size()[1]/dev.size()[2]
plot.new()
plot.window(xlim=c(0,100)*aspect, ylim=c(0,100), xaxs="i", yaxs="i")
if(!is.null(title)) title(main=title)
bounds <- c(par("usr"))
sec.data<-tapply(data[[size.col]], data[[sec.col]], sum)
sec.data<-as.data.frame(sec.data)
sec.data$sec <- rownames(sec.data)
names(sec.data) <- c("size", "section")
sec.data$change<-0
sec.data$left<-0
sec.data$right<-0
sec.data$top<-0
sec.data$bottom<-0
sec.data<-boxChartHelper(sec.data, bounds[1], bounds[2], bounds[3], bounds[4])
# map and plot individual stocks
for (i in 1:nrow(sec.data)) {
sec.name = sec.data$section[i]
stock.list <- data[data$Sector==sec.name,]
stock.list$left<-0
stock.list$right<-0
stock.list$top<-0
stock.list$bottom<-0
stock.list$size <- stock.list[[size.col]]
stock.list <- boxChartHelper(stock.list, sec.data$left[i],
sec.data$right[i],
sec.data$bottom[i],
sec.data$top[i])
sec.data$change[i] <- sum(stock.list[change.col] * stock.list$size)/
sum(stock.list$size)
# plot the results
for (j in 1:nrow(stock.list)) {
# draw the filled rectangle
rect(xleft=stock.list$left[j],
ybottom=stock.list$bottom[j],
xright=stock.list$right[j],
ytop=stock.list$top[j],
border="grey", lwd=0.25, col=stock.list$colour[j])
# draw the name of the stock
if(show.box.labels==TRUE) {
stock.text <- paste(stock.list$Ticker[j],"\n",format(stock.list[[change.col]][j], digits=2),"%",sep="")
stock.cex <- 0.75
box.width <- (stock.list$right[j] - stock.list$left[j])/2
box.height <- (stock.list$top[j] - stock.list$bottom[j])/2
# remove the ticker symbol if too small
if ((strheight(stock.text, cex=stock.cex) >= box.height * 0.98) ||
(strwidth(stock.text, cex=stock.cex) >= box.width * 0.98)) {
stock.text <- stock.list$Ticker[j]
}
# try one level smaller
if(strheight(stock.text,cex=stock.cex) >= box.height * 0.98) stock.cex <- 0.5
if(strwidth(stock.text,cex=stock.cex) >= box.width * 0.98) stock.cex <- 0.5
# draw if it fits
if ((strheight(stock.text, cex=stock.cex) < box.height * 0.99) &&
(strwidth(stock.text, cex=stock.cex) < box.width * 0.99)) {
text(x=mean(c(stock.list$left[j], stock.list$right[j])),
y=mean(c(stock.list$top[j], stock.list$bottom[j])) - strheight(stock.text, cex=stock.cex)/2,
labels=stock.text,
cex=stock.cex,
pos=3,
offset=0)
}
}
}
}
# plot the sections
for (i in 1:nrow(sec.data)) {
sec.width <- sec.data$right[i] - sec.data$left[i]
rect(xleft=sec.data$left[i],
ybottom=sec.data$bottom[i],
xright=sec.data$right[i],
ytop=sec.data$top[i],
border="black", lwd=3)
if (strwidth(sec.data$section[i], cex=1.5) > sec.width) {
label <- gsub(" ", "\n", sec.data$section[i])
} else {
label <- sec.data$section[i]
}
text(x=(sec.data$left[i]+sec.data$right[i])/2,
y=(sec.data$bottom[i]+sec.data$top[i])/2+strheight(label)/2,
labels=label, pos=1, offset=0, cex=1.5)
}
invisible(sec.data)
}

