Visualizing the S&P 500

It’s difficult to grasp what is happening to the entire S&P 500 all at once. A friend of mine who was a stellar technical analyst came up with an idea to view the S&P 500 as a packed array of boxes, organized by sector and with each stocks ‘box’ area scaled to market cap. Then each box was colour-coded by return for the month. The net result was something like this:

sp500.april.box.chart

I recreated that image for the monthly economics report using the following code. The boxChartHelper function takes a data.frame holding the stocks and returns another data.frame with the co-ordinates for each area. The data frame should have a column named “size” which is used to scale the boxes. The plotBoxChart function takes a data.frame of stocks (with columns specifying their ticker, sector, size or market cap, and the colour you want the box) and plots it using base graphics.

I know: (1) Hadley Wickham could do it in 2 lines with ggplot2, and (2) this isn’t a box plot so the naming is terrible. I’m just putting it here for reference because some people wanted to know how I make the chart – hint: not in Excel.

boxChartHelper<-function(data, # data.frame holding data
                         left, right, bottom, top) {
  if(is.na(sum(data$size))) browser()
  
  # calc values
  width = right - left
  height = top - bottom
  aspect = width/height  # asp > 1, width bigger than height
  
  # if there is only one row, set and leave
  if (nrow(data)==1) {
    data$left = left
    data$right = right
    data$left = left
    data$top = top
    data$bottom = bottom
    return(data)
  }
  
  # if two rows, split by width
  if (nrow(data)==2) {
    if (aspect >= 1) { # split the width
      data$top = top
      data$bottom = bottom
      width.1 = width*(data$size[1]/sum(data$size))
      data$left = c(left, left+width.1)
      data$right = c(left+width.1, right)
      return(data)
    } else { # split the height
      data$left = left
      data$right = right
      height.1 = height*(data$size[1]/sum(data$size))
      data$bottom = c(bottom, bottom + height.1)
      data$top = c(bottom + height.1, top)
      return(data)
    }
  }
  
  # else, cut into two and recurse
  data<-data[order(data$size, decreasing=TRUE),]
  splitter <- c(TRUE, rep(FALSE, nrow(data)-1))
  for(i in 2:nrow(data)) {
    if(sum(data$size[splitter])/sum(data$size) > 0.5) break
    splitter[i] <- TRUE
  }
  
  if (aspect >= 1) { # split the width
    width.1 <- width*sum(data$size[splitter])/sum(data$size)
    return(rbind(boxChartHelper(data[splitter,], left, left+width.1, bottom, top),
                 boxChartHelper(data[!splitter,], left+width.1, right, bottom, top)))           
  } else { # split the height
    height.1 <- height*sum(data$size[splitter])/sum(data$size)
    return(rbind(boxChartHelper(data[splitter,], left, right, bottom, bottom+height.1),
                 boxChartHelper(data[!splitter,], left, right, bottom+height.1, top)))           
  }
}


plotBoxChart<-function(data, 
                       sec.col="Sector", 
                       size.col="MarketCap", 
                       change.col="Change",
                       colour.col="colour", 
                       title=NULL,
                       bottom.space=4,
                       show.box.labels=FALSE) {
  # make sure the data is okay
  if (any(is.na(data[[sec.col]]))) stop("Error: NA Sectors")
  if (any(is.na(data[[size.col]]))) stop("Error: NA Sizes")
  
  
  if(!is.null(title)) title.space = 2 else title.space=0.25
  par(mar=c(bottom.space,0.25,title.space,0.25), cex=1)
  aspect=dev.size()[1]/dev.size()[2]
  plot.new()
  plot.window(xlim=c(0,100)*aspect, ylim=c(0,100), xaxs="i", yaxs="i")
  if(!is.null(title)) title(main=title)
  
  bounds <- c(par("usr"))
  sec.data<-tapply(data[[size.col]], data[[sec.col]], sum)
  sec.data<-as.data.frame(sec.data)
  sec.data$sec <- rownames(sec.data)
  names(sec.data) <- c("size", "section")
  sec.data$change<-0
  sec.data$left<-0
  sec.data$right<-0
  sec.data$top<-0
  sec.data$bottom<-0
  
  sec.data<-boxChartHelper(sec.data, bounds[1], bounds[2], bounds[3], bounds[4])
  
  # map and plot individual stocks
  for (i in 1:nrow(sec.data)) {
    sec.name = sec.data$section[i]
    stock.list <- data[data$Sector==sec.name,]
    stock.list$left<-0
    stock.list$right<-0
    stock.list$top<-0
    stock.list$bottom<-0
    stock.list$size <- stock.list[[size.col]]
    
    stock.list <- boxChartHelper(stock.list, sec.data$left[i], 
                                 sec.data$right[i],
                                 sec.data$bottom[i],
                                 sec.data$top[i])
    
    sec.data$change[i] <- sum(stock.list[change.col] * stock.list$size)/
      sum(stock.list$size)
    
    # plot the results
    for (j in 1:nrow(stock.list)) {
      # draw the filled rectangle
      rect(xleft=stock.list$left[j],
           ybottom=stock.list$bottom[j],
           xright=stock.list$right[j],
           ytop=stock.list$top[j],
           border="grey", lwd=0.25, col=stock.list$colour[j])
      
      # draw the name of the stock
      if(show.box.labels==TRUE) {
        stock.text <- paste(stock.list$Ticker[j],"\n",format(stock.list[[change.col]][j], digits=2),"%",sep="")
        stock.cex <- 0.75
        box.width <- (stock.list$right[j] - stock.list$left[j])/2
        box.height <- (stock.list$top[j] - stock.list$bottom[j])/2
        
        # remove the ticker symbol if too small
        if ((strheight(stock.text, cex=stock.cex) >= box.height * 0.98) ||
              (strwidth(stock.text, cex=stock.cex) >= box.width * 0.98)) {
          stock.text <- stock.list$Ticker[j]
        }
        
        # try one level smaller
        if(strheight(stock.text,cex=stock.cex) >= box.height * 0.98) stock.cex <- 0.5
        if(strwidth(stock.text,cex=stock.cex) >= box.width * 0.98) stock.cex <- 0.5
        
        # draw if it fits
        if ((strheight(stock.text, cex=stock.cex) < box.height * 0.99) &&
              (strwidth(stock.text, cex=stock.cex) < box.width * 0.99)) {
          text(x=mean(c(stock.list$left[j], stock.list$right[j])),
               y=mean(c(stock.list$top[j], stock.list$bottom[j])) - strheight(stock.text, cex=stock.cex)/2,
               labels=stock.text,
               cex=stock.cex,
               pos=3,
               offset=0)
        }
      }
      
    }
  }
  
  # plot the sections
  for (i in 1:nrow(sec.data)) {
    sec.width <- sec.data$right[i] - sec.data$left[i]
    rect(xleft=sec.data$left[i],
         ybottom=sec.data$bottom[i],
         xright=sec.data$right[i],
         ytop=sec.data$top[i],
         border="black", lwd=3)
    if (strwidth(sec.data$section[i], cex=1.5) > sec.width) {
      label <- gsub(" ", "\n", sec.data$section[i])
    } else {
      label <- sec.data$section[i]
    }
    text(x=(sec.data$left[i]+sec.data$right[i])/2,
         y=(sec.data$bottom[i]+sec.data$top[i])/2+strheight(label)/2,
         labels=label, pos=1, offset=0, cex=1.5)
  }
  invisible(sec.data)
}