library(TTR) tickers<- read.table("sp500 tickers 10-04.csv",sep="\t",header=T,quote="") colnames(tickers) <- c("name","ticker") #remove trailing whitespaces tickers[,"ticker"] <- sub(' +$', '',tickers[,"ticker"]) #replace / by - tickers[,"ticker"] <- sub('/', '-',tickers[,"ticker"]) #first don't take care about survivor bias for(i in 1:length(tickers[,"ticker"])){ ticker <- tickers[i,"ticker"] assign(ticker,getYahooData(ticker,"20000101")) } #compute matrix of returns ticker <- tickers[1,"ticker"] returns <- diff(log(get(ticker)$Close)) for(i in 2:length(tickers[,"ticker"])){ ticker <- tickers[i,"ticker"] returns <- merge(returns,diff(log(get(ticker)$Close))) } colnames(returns) <- tickers[,"ticker"] returns <- returns[-1,] universe <- which(!is.na(returns[1,])) returns.subset <- returns[,universe] #constant weight rebalancing w <- rep(1/dim(returns.subset)[2],dim(returns.subset)[2]) N <- 1000000 W <- N for(day in 1:length(returns[,1])){ r <- as.vector(exp(returns.subset[day,])-1) r[is.na(r)] <- 0 #NA means we don't know the vector that day, but next return will catch up dN <- N*crossprod(w,r) N <- N+dN W <- c(W,N) } #no rebalancing N <- 1000000 W <- N R <- rep(1,dim(returns.subset)[2]) for(day in 1:length(returns[,1])){ r <- as.vector(exp(returns.subset[day,])) r[is.na(r)] <- 1 R <- R*r W <- c(W,N*mean(R)) } indexchange <- read.table("IndexChange_500.csv",sep="\t",header=T,quote="") #cleanup tickers ticker.add <- indexchange$Ticker ticker.add <- sub('/', '-',ticker.add) ticker.add <- sub('.wi', '',ticker.add,fixed=T) ticker.add <- sub('wi', '',ticker.add) ticker.add <- sub('.', '-',ticker.add,fixed=T) getYahooData<-function (symbol, start, end, freq = "daily", type = "price", adjust = TRUE, quiet = FALSE) { if (missing(start)) { beg <- as.POSIXlt("1900-01-01") } else { beg <- as.POSIXlt(as.Date(as.character(start), "%Y%m%d")) } if (missing(end)) { end <- as.POSIXlt(Sys.Date()) } else { end <- as.POSIXlt(as.Date(as.character(end), "%Y%m%d")) } if (beg > end) stop("Start date must be before end date.") if (beg > as.POSIXlt(Sys.Date())) stop("Start date is after today's date.") freq <- match.arg(freq, c("daily", "weekly", "monthly")) type <- match.arg(type, c("price", "split")) if (type == "price") { freq.url <- substr(freq, 1, 1) } else { freq.url <- "v" if (freq != "daily" & !quiet) message("Only freq=\"daily\" data available for type=\"split\".\n", "Setting freq=\"daily\"...") } flush.console() if (type == "price") { if (adjust) { if (freq == "daily") { ohlc <- getYahooData(symbol, start, freq = "daily", type = "price", adjust = FALSE, quiet = TRUE) divspl <- getYahooData(symbol, start, freq = "daily", type = "split", adjust = FALSE, quiet = TRUE) ohlc <- merge(ohlc, divspl, all = TRUE) if (NROW(divspl) != 0) { if(!all(is.na(ohlc[is.na(ohlc[,"Close"]),"Split"]))){ cat("ugly WAT fix!\n") ohlc[162,"Close"] <- 83.75 } adj <- adjRatios(ohlc[, "Split"], ohlc[, "Div"], ohlc[, "Close"]) s.ratio <- adj[, 1] d.ratio <- adj[, 2] cn <- colnames(ohlc) ohlc <- cbind(ohlc, ohlc[, "Close"]) colnames(ohlc) <- c(cn, "Unadj.Close") ohlc[time(ohlc[, "Open"] * d.ratio * s.ratio), "Open"] <- ohlc[, "Open"] * d.ratio * s.ratio ohlc[time(ohlc[, "High"] * d.ratio * s.ratio), "High"] <- ohlc[, "High"] * d.ratio * s.ratio ohlc[time(ohlc[, "Low"] * d.ratio * s.ratio), "Low"] <- ohlc[, "Low"] * d.ratio * s.ratio ohlc[time(ohlc[, "Close"] * d.ratio * s.ratio), "Close"] <- ohlc[, "Close"] * d.ratio * s.ratio ohlc[time(ohlc[, "Volume"] * (1/d.ratio)), "Volume"] <- ohlc[, "Volume"] * (1/d.ratio) ohlc <- ohlc[, c("Open", "High", "Low", "Close", "Volume", "Unadj.Close", "Div", "Split", "Adj.Div")] } } else stop("Only freq=\"daily\" adjusted data is currently supported.") } else { url <- paste("http://ichart.finance.yahoo.com/table.csv?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&ignore=.csv",sep = "") ohlc <- read.table(url, header = TRUE, sep = ",") ohlc[, "Adj.Close"] <- NULL ohlc <- ohlc[order(ohlc[, "Date"]), ] ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1]))) } } else { if (!quiet) message("Unadjusted and adjusted dividend data are always returned.") url <- paste("http://ichart.finance.yahoo.com/x?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&y=0&z=30000", sep = "") ohlc <- read.table(url, skip = 1, sep = ",", fill = TRUE, as.is = TRUE) div <- data.frame(Date = ohlc[ohlc[, "V1"] == "DIVIDEND", "V2"], Adj.Div = as.numeric(ohlc[ohlc[, "V1"] == "DIVIDEND", "V3"]), stringsAsFactors = FALSE) spl <- data.frame(Date = ohlc[ohlc[, "V1"] == "SPLIT", "V2"], Split = as.character(ohlc[ohlc[, "V1"] == "SPLIT", "V3"]), stringsAsFactors = FALSE) ohlc <- merge(div, spl, by.col = "Date", all = TRUE) if (NROW(ohlc) == 0) return(ohlc) ohlc[, "Date"] <- as.Date(as.character(ohlc[, "Date"]), "%Y%m%d") ohlc[, "Split"] <- sub(":", "/", ohlc[, "Split"]) ohlc[, "Split"] <- 1/sapply(parse(text = ohlc[, "Split"]), eval) ohlc <- ohlc[order(ohlc[, 1]), ] ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1]))) if (all(is.na(ohlc[, "Split"]))) { s.ratio <- rep(1, NROW(ohlc)) } else { s.ratio <- adjRatios(split = ohlc[, "Split"])[, 1] } ohlc <- cbind(ohlc, ohlc[, "Adj.Div"] * (1/s.ratio)) colnames(ohlc)[3] <- "Div" ohlc[, "Split"] <- as.numeric(ohlc[, "Split"]) ohlc <- ohlc[, c("Div", "Split", "Adj.Div")] } ohlc <- ohlc[paste(beg, end, sep = "/"), ] return(ohlc) }