3 files changed, 123 insertions, 114 deletions
diff --git a/IndexChange_500.csv b/IndexChange_500.csv
index 49b28b3..dbdb143 100644
--- a/IndexChange_500.csv
+++ b/IndexChange_500.csv
@@ -1,4 +1,4 @@
-Company Additions	Ticker	Company Deletions	Ticker	Date
+Company Additions	Ticker.add	Company Deletions	Ticker.del	Date
 	MOS		NSM	09/23/11
 	ACN		MIR	07/05/11
 	MPCwi		RSH	06/30/11
diff --git a/bandit.R b/bandit.R
index 9113ed3..0d35758 100644
--- a/bandit.R
+++ b/bandit.R
@@ -1,28 +1,29 @@
 library(TTR)
-tickers<- read.table("sp500 tickers 10-04.csv",sep="\t",header=T,quote="")
-colnames(tickers) <- c("name","ticker")
-#remove trailing whitespaces
-tickers[,"ticker"] <- sub(' +$', '',tickers[,"ticker"])
+#fix bug in getYahooData
+source("getYahooData.R")
+sp500<- read.table("sp500 tickers 10-04.csv",sep="\t",header=T,quote="",colClasses="character",strip.white=T)
+colnames(sp500) <- c("name","ticker")
 #replace / by -
-tickers[,"ticker"] <- sub('/', '-',tickers[,"ticker"])
+sp500$ticker <- sub('/', '-', sp500$ticker)
 
 #first don't take care about survivor bias
-for(i in 1:length(tickers[,"ticker"])){
-  ticker <- tickers[i,"ticker"]
-  assign(ticker,getYahooData(ticker,"20000101"))
+tickerlist <- list()
+for(i in 1:length(sp500$ticker)){
+  ticker <- sp500[i,"ticker"]
+  tickerlist[[ticker]] <- getYahooData(ticker,"20000101")
 }
 
 #compute matrix of returns
-ticker <- tickers[1,"ticker"]
-returns <- diff(log(get(ticker)$Close))
-for(i in 2:length(tickers[,"ticker"])){
-  ticker <- tickers[i,"ticker"]
-  returns <- merge(returns,diff(log(get(ticker)$Close)))
+ticker <- sp500[1,"ticker"]
+returns <- diff(log(tickerlist[[ticker]]$Close))
+for(i in 2:length(sp500[,"ticker"])){
+  ticker <- sp500[i,"ticker"]
+  returns <- merge(returns,diff(log(tickerlist[[ticker]]$Close)))
 }
-colnames(returns) <- tickers[,"ticker"]
+colnames(returns) <- sp500[,"ticker"]
 returns <- returns[-1,]
 universe <- which(!is.na(returns[1,]))
-returns.subset <- returns[,universe]
+returns.subset <- returnds[,universe]
 
 #constant weight rebalancing
 w <- rep(1/dim(returns.subset)[2],dim(returns.subset)[2])
@@ -48,102 +49,17 @@ for(day in 1:length(returns[,1])){
 
 indexchange <- read.table("IndexChange_500.csv",sep="\t",header=T,quote="")
 #cleanup tickers
-ticker.add <- indexchange$Ticker
-ticker.add <- sub('/', '-',ticker.add)
-ticker.add <- sub('.wi', '',ticker.add,fixed=T)
-ticker.add <- sub('wi', '',ticker.add)
-ticker.add <- sub('.', '-',ticker.add,fixed=T)
+attach(indexchange)
+Ticker.add <- sub('/', '-',Ticker.add)
+Ticker.add <- sub('.wi', '',Ticker.add,fixed=T)
+Ticker.add <- sub('wi', '',Ticker.add)
+Ticker.add <- sub('.', '-',Ticker.add,fixed=T)
+Ticker.del <- sub('/', '-',Ticker.del)
+Ticker.del <- sub('.wi', '',Ticker.del,fixed=T)
+Ticker.del <- sub('wi', '',Ticker.del)
+Ticker.del <- sub('.', '-',Ticker.del,fixed=T)
+Date <- as.Date(Date,format="%m/%d/%y")
+for( jour in Date){
+  
+detach(indexchange)
 
-getYahooData<-function (symbol, start, end, freq = "daily", type = "price", adjust = TRUE, quiet = FALSE) 
-{
-    if (missing(start)) {
-        beg <- as.POSIXlt("1900-01-01")
-    }
-    else {
-        beg <- as.POSIXlt(as.Date(as.character(start), "%Y%m%d"))
-    }
-    if (missing(end)) {
-        end <- as.POSIXlt(Sys.Date())
-    }
-    else {
-        end <- as.POSIXlt(as.Date(as.character(end), "%Y%m%d"))
-    }
-    if (beg > end) 
-        stop("Start date must be before end date.")
-    if (beg > as.POSIXlt(Sys.Date())) 
-        stop("Start date is after today's date.")
-    freq <- match.arg(freq, c("daily", "weekly", "monthly"))
-    type <- match.arg(type, c("price", "split"))
-    if (type == "price") {
-        freq.url <- substr(freq, 1, 1)
-    }
-    else {
-        freq.url <- "v"
-        if (freq != "daily" & !quiet) 
-            message("Only freq=\"daily\" data available for type=\"split\".\n", "Setting freq=\"daily\"...")
-    }
-    flush.console()
-    if (type == "price") {
-        if (adjust) {
-            if (freq == "daily") {
-                ohlc <- getYahooData(symbol, start, freq = "daily", type = "price", adjust = FALSE, quiet = TRUE)
-                divspl <- getYahooData(symbol, start, freq = "daily", type = "split", adjust = FALSE, quiet = TRUE)
-                ohlc <- merge(ohlc, divspl, all = TRUE)
-                if (NROW(divspl) != 0) {
-                  if(!all(is.na(ohlc[is.na(ohlc[,"Close"]),"Split"]))){
-                    cat("ugly WAT fix!\n")
-                    ohlc[162,"Close"] <- 83.75
-                  }
-                  adj <- adjRatios(ohlc[, "Split"], ohlc[, "Div"], ohlc[, "Close"])
-                  s.ratio <- adj[, 1]
-                  d.ratio <- adj[, 2]
-                  cn <- colnames(ohlc)
-                  ohlc <- cbind(ohlc, ohlc[, "Close"])
-                  colnames(ohlc) <- c(cn, "Unadj.Close")
-                  ohlc[time(ohlc[, "Open"] * d.ratio * s.ratio), "Open"] <- ohlc[, "Open"] * d.ratio * s.ratio
-                  ohlc[time(ohlc[, "High"] * d.ratio * s.ratio), "High"] <- ohlc[, "High"] * d.ratio * s.ratio
-                  ohlc[time(ohlc[, "Low"] * d.ratio * s.ratio), "Low"] <- ohlc[, "Low"] * d.ratio * s.ratio
-                  ohlc[time(ohlc[, "Close"] * d.ratio * s.ratio), "Close"] <- ohlc[, "Close"] * d.ratio * s.ratio
-                  ohlc[time(ohlc[, "Volume"] * (1/d.ratio)), "Volume"] <- ohlc[, "Volume"] * (1/d.ratio)
-                  ohlc <- ohlc[, c("Open", "High", "Low", "Close", "Volume", "Unadj.Close", "Div", "Split", "Adj.Div")]
-                }
-            }
-            else stop("Only freq=\"daily\" adjusted data is currently supported.")
-        }
-        else {
-            url <- paste("http://ichart.finance.yahoo.com/table.csv?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&ignore=.csv",sep = "")
-            ohlc <- read.table(url, header = TRUE, sep = ",")
-            ohlc[, "Adj.Close"] <- NULL
-            ohlc <- ohlc[order(ohlc[, "Date"]), ]
-            ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1])))
-        }
-    }
-    else {
-        if (!quiet) 
-            message("Unadjusted and adjusted dividend data are always returned.")
-        url <- paste("http://ichart.finance.yahoo.com/x?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&y=0&z=30000", sep = "")
-        ohlc <- read.table(url, skip = 1, sep = ",", fill = TRUE, as.is = TRUE)
-        div <- data.frame(Date = ohlc[ohlc[, "V1"] == "DIVIDEND", "V2"], Adj.Div = as.numeric(ohlc[ohlc[, "V1"] == "DIVIDEND", "V3"]), stringsAsFactors = FALSE)
-        spl <- data.frame(Date = ohlc[ohlc[, "V1"] == "SPLIT", "V2"], Split = as.character(ohlc[ohlc[, "V1"] == "SPLIT", "V3"]), stringsAsFactors = FALSE)
-        ohlc <- merge(div, spl, by.col = "Date", all = TRUE)
-        if (NROW(ohlc) == 0) 
-            return(ohlc)
-        ohlc[, "Date"] <- as.Date(as.character(ohlc[, "Date"]), "%Y%m%d")
-        ohlc[, "Split"] <- sub(":", "/", ohlc[, "Split"])
-        ohlc[, "Split"] <- 1/sapply(parse(text = ohlc[, "Split"]), eval)
-        ohlc <- ohlc[order(ohlc[, 1]), ]
-        ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1])))
-        if (all(is.na(ohlc[, "Split"]))) {
-            s.ratio <- rep(1, NROW(ohlc))
-        }
-        else {
-            s.ratio <- adjRatios(split = ohlc[, "Split"])[, 1]
-        }
-        ohlc <- cbind(ohlc, ohlc[, "Adj.Div"] * (1/s.ratio))
-        colnames(ohlc)[3] <- "Div"
-        ohlc[, "Split"] <- as.numeric(ohlc[, "Split"])
-        ohlc <- ohlc[, c("Div", "Split", "Adj.Div")]
-    }
-    ohlc <- ohlc[paste(beg, end, sep = "/"), ]
-    return(ohlc)
-}
diff --git a/getYahooData.R b/getYahooData.R
new file mode 100644
index 0000000..a758ae1
--- /dev/null
+++ b/getYahooData.R
@@ -0,0 +1,93 @@
+getYahooData<-function (symbol, start, end, freq = "daily", type = "price", adjust = TRUE, quiet = FALSE) 
+{
+    if (missing(start)) {
+        beg <- as.POSIXlt("1900-01-01")
+    }
+    else {
+        beg <- as.POSIXlt(as.Date(as.character(start), "%Y%m%d"))
+    }
+    if (missing(end)) {
+        end <- as.POSIXlt(Sys.Date())
+    }
+    else {
+        end <- as.POSIXlt(as.Date(as.character(end), "%Y%m%d"))
+    }
+    if (beg > end) 
+        stop("Start date must be before end date.")
+    if (beg > as.POSIXlt(Sys.Date())) 
+        stop("Start date is after today's date.")
+    freq <- match.arg(freq, c("daily", "weekly", "monthly"))
+    type <- match.arg(type, c("price", "split"))
+    if (type == "price") {
+        freq.url <- substr(freq, 1, 1)
+    }
+    else {
+        freq.url <- "v"
+        if (freq != "daily" & !quiet) 
+            message("Only freq=\"daily\" data available for type=\"split\".\n", "Setting freq=\"daily\"...")
+    }
+    flush.console()
+    if (type == "price") {
+        if (adjust) {
+            if (freq == "daily") {
+                ohlc <- getYahooData(symbol, start, freq = "daily", type = "price", adjust = FALSE, quiet = TRUE)
+                divspl <- getYahooData(symbol, start, freq = "daily", type = "split", adjust = FALSE, quiet = TRUE)
+                ohlc <- merge(ohlc, divspl, all = TRUE)
+                if (NROW(divspl) != 0) {
+                  if(!all(is.na(ohlc[is.na(ohlc[,"Close"]),"Split"]))){
+                    cat("ugly WAT fix!\n")
+                    ohlc[162,"Close"] <- 83.75
+                  }
+                  adj <- adjRatios(ohlc[, "Split"], ohlc[, "Div"], ohlc[, "Close"])
+                  s.ratio <- adj[, 1]
+                  d.ratio <- adj[, 2]
+                  cn <- colnames(ohlc)
+                  ohlc <- cbind(ohlc, ohlc[, "Close"])
+                  colnames(ohlc) <- c(cn, "Unadj.Close")
+                  ohlc[time(ohlc[, "Open"] * d.ratio * s.ratio), "Open"] <- ohlc[, "Open"] * d.ratio * s.ratio
+                  ohlc[time(ohlc[, "High"] * d.ratio * s.ratio), "High"] <- ohlc[, "High"] * d.ratio * s.ratio
+                  ohlc[time(ohlc[, "Low"] * d.ratio * s.ratio), "Low"] <- ohlc[, "Low"] * d.ratio * s.ratio
+                  ohlc[time(ohlc[, "Close"] * d.ratio * s.ratio), "Close"] <- ohlc[, "Close"] * d.ratio * s.ratio
+                  ohlc[time(ohlc[, "Volume"] * (1/d.ratio)), "Volume"] <- ohlc[, "Volume"] * (1/d.ratio)
+                  ohlc <- ohlc[, c("Open", "High", "Low", "Close", "Volume", "Unadj.Close", "Div", "Split", "Adj.Div")]
+                }
+            }
+            else stop("Only freq=\"daily\" adjusted data is currently supported.")
+        }
+        else {
+            url <- paste("http://ichart.finance.yahoo.com/table.csv?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&ignore=.csv",sep = "")
+            ohlc <- read.table(url, header = TRUE, sep = ",")
+            ohlc[, "Adj.Close"] <- NULL
+            ohlc <- ohlc[order(ohlc[, "Date"]), ]
+            ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1])))
+        }
+    }
+    else {
+        if (!quiet) 
+            message("Unadjusted and adjusted dividend data are always returned.")
+        url <- paste("http://ichart.finance.yahoo.com/x?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&y=0&z=30000", sep = "")
+        ohlc <- read.table(url, skip = 1, sep = ",", fill = TRUE, as.is = TRUE)
+        div <- data.frame(Date = ohlc[ohlc[, "V1"] == "DIVIDEND", "V2"], Adj.Div = as.numeric(ohlc[ohlc[, "V1"] == "DIVIDEND", "V3"]), stringsAsFactors = FALSE)
+        spl <- data.frame(Date = ohlc[ohlc[, "V1"] == "SPLIT", "V2"], Split = as.character(ohlc[ohlc[, "V1"] == "SPLIT", "V3"]), stringsAsFactors = FALSE)
+        ohlc <- merge(div, spl, by.col = "Date", all = TRUE)
+        if (NROW(ohlc) == 0) 
+            return(ohlc)
+        ohlc[, "Date"] <- as.Date(as.character(ohlc[, "Date"]), "%Y%m%d")
+        ohlc[, "Split"] <- sub(":", "/", ohlc[, "Split"])
+        ohlc[, "Split"] <- 1/sapply(parse(text = ohlc[, "Split"]), eval)
+        ohlc <- ohlc[order(ohlc[, 1]), ]
+        ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1])))
+        if (all(is.na(ohlc[, "Split"]))) {
+            s.ratio <- rep(1, NROW(ohlc))
+        }
+        else {
+            s.ratio <- adjRatios(split = ohlc[, "Split"])[, 1]
+        }
+        ohlc <- cbind(ohlc, ohlc[, "Adj.Div"] * (1/s.ratio))
+        colnames(ohlc)[3] <- "Div"
+        ohlc[, "Split"] <- as.numeric(ohlc[, "Split"])
+        ohlc <- ohlc[, c("Div", "Split", "Adj.Div")]
+    }
+    ohlc <- ohlc[paste(beg, end, sep = "/"), ]
+    return(ohlc)
+}