diff options
| -rw-r--r-- | IndexChange_500.csv | 2 | ||||
| -rw-r--r-- | bandit.R | 142 | ||||
| -rw-r--r-- | getYahooData.R | 93 |
3 files changed, 123 insertions, 114 deletions
diff --git a/IndexChange_500.csv b/IndexChange_500.csv index 49b28b3..dbdb143 100644 --- a/IndexChange_500.csv +++ b/IndexChange_500.csv @@ -1,4 +1,4 @@ -Company Additions Ticker Company Deletions Ticker Date +Company Additions Ticker.add Company Deletions Ticker.del Date MOS NSM 09/23/11 ACN MIR 07/05/11 MPCwi RSH 06/30/11 @@ -1,28 +1,29 @@ library(TTR) -tickers<- read.table("sp500 tickers 10-04.csv",sep="\t",header=T,quote="") -colnames(tickers) <- c("name","ticker") -#remove trailing whitespaces -tickers[,"ticker"] <- sub(' +$', '',tickers[,"ticker"]) +#fix bug in getYahooData +source("getYahooData.R") +sp500<- read.table("sp500 tickers 10-04.csv",sep="\t",header=T,quote="",colClasses="character",strip.white=T) +colnames(sp500) <- c("name","ticker") #replace / by - -tickers[,"ticker"] <- sub('/', '-',tickers[,"ticker"]) +sp500$ticker <- sub('/', '-', sp500$ticker) #first don't take care about survivor bias -for(i in 1:length(tickers[,"ticker"])){ - ticker <- tickers[i,"ticker"] - assign(ticker,getYahooData(ticker,"20000101")) +tickerlist <- list() +for(i in 1:length(sp500$ticker)){ + ticker <- sp500[i,"ticker"] + tickerlist[[ticker]] <- getYahooData(ticker,"20000101") } #compute matrix of returns -ticker <- tickers[1,"ticker"] -returns <- diff(log(get(ticker)$Close)) -for(i in 2:length(tickers[,"ticker"])){ - ticker <- tickers[i,"ticker"] - returns <- merge(returns,diff(log(get(ticker)$Close))) +ticker <- sp500[1,"ticker"] +returns <- diff(log(tickerlist[[ticker]]$Close)) +for(i in 2:length(sp500[,"ticker"])){ + ticker <- sp500[i,"ticker"] + returns <- merge(returns,diff(log(tickerlist[[ticker]]$Close))) } -colnames(returns) <- tickers[,"ticker"] +colnames(returns) <- sp500[,"ticker"] returns <- returns[-1,] universe <- which(!is.na(returns[1,])) -returns.subset <- returns[,universe] +returns.subset <- returnds[,universe] #constant weight rebalancing w <- rep(1/dim(returns.subset)[2],dim(returns.subset)[2]) @@ -48,102 +49,17 @@ for(day in 1:length(returns[,1])){ indexchange <- read.table("IndexChange_500.csv",sep="\t",header=T,quote="") #cleanup tickers -ticker.add <- indexchange$Ticker -ticker.add <- sub('/', '-',ticker.add) -ticker.add <- sub('.wi', '',ticker.add,fixed=T) -ticker.add <- sub('wi', '',ticker.add) -ticker.add <- sub('.', '-',ticker.add,fixed=T) +attach(indexchange) +Ticker.add <- sub('/', '-',Ticker.add) +Ticker.add <- sub('.wi', '',Ticker.add,fixed=T) +Ticker.add <- sub('wi', '',Ticker.add) +Ticker.add <- sub('.', '-',Ticker.add,fixed=T) +Ticker.del <- sub('/', '-',Ticker.del) +Ticker.del <- sub('.wi', '',Ticker.del,fixed=T) +Ticker.del <- sub('wi', '',Ticker.del) +Ticker.del <- sub('.', '-',Ticker.del,fixed=T) +Date <- as.Date(Date,format="%m/%d/%y") +for( jour in Date){ + +detach(indexchange) -getYahooData<-function (symbol, start, end, freq = "daily", type = "price", adjust = TRUE, quiet = FALSE) -{ - if (missing(start)) { - beg <- as.POSIXlt("1900-01-01") - } - else { - beg <- as.POSIXlt(as.Date(as.character(start), "%Y%m%d")) - } - if (missing(end)) { - end <- as.POSIXlt(Sys.Date()) - } - else { - end <- as.POSIXlt(as.Date(as.character(end), "%Y%m%d")) - } - if (beg > end) - stop("Start date must be before end date.") - if (beg > as.POSIXlt(Sys.Date())) - stop("Start date is after today's date.") - freq <- match.arg(freq, c("daily", "weekly", "monthly")) - type <- match.arg(type, c("price", "split")) - if (type == "price") { - freq.url <- substr(freq, 1, 1) - } - else { - freq.url <- "v" - if (freq != "daily" & !quiet) - message("Only freq=\"daily\" data available for type=\"split\".\n", "Setting freq=\"daily\"...") - } - flush.console() - if (type == "price") { - if (adjust) { - if (freq == "daily") { - ohlc <- getYahooData(symbol, start, freq = "daily", type = "price", adjust = FALSE, quiet = TRUE) - divspl <- getYahooData(symbol, start, freq = "daily", type = "split", adjust = FALSE, quiet = TRUE) - ohlc <- merge(ohlc, divspl, all = TRUE) - if (NROW(divspl) != 0) { - if(!all(is.na(ohlc[is.na(ohlc[,"Close"]),"Split"]))){ - cat("ugly WAT fix!\n") - ohlc[162,"Close"] <- 83.75 - } - adj <- adjRatios(ohlc[, "Split"], ohlc[, "Div"], ohlc[, "Close"]) - s.ratio <- adj[, 1] - d.ratio <- adj[, 2] - cn <- colnames(ohlc) - ohlc <- cbind(ohlc, ohlc[, "Close"]) - colnames(ohlc) <- c(cn, "Unadj.Close") - ohlc[time(ohlc[, "Open"] * d.ratio * s.ratio), "Open"] <- ohlc[, "Open"] * d.ratio * s.ratio - ohlc[time(ohlc[, "High"] * d.ratio * s.ratio), "High"] <- ohlc[, "High"] * d.ratio * s.ratio - ohlc[time(ohlc[, "Low"] * d.ratio * s.ratio), "Low"] <- ohlc[, "Low"] * d.ratio * s.ratio - ohlc[time(ohlc[, "Close"] * d.ratio * s.ratio), "Close"] <- ohlc[, "Close"] * d.ratio * s.ratio - ohlc[time(ohlc[, "Volume"] * (1/d.ratio)), "Volume"] <- ohlc[, "Volume"] * (1/d.ratio) - ohlc <- ohlc[, c("Open", "High", "Low", "Close", "Volume", "Unadj.Close", "Div", "Split", "Adj.Div")] - } - } - else stop("Only freq=\"daily\" adjusted data is currently supported.") - } - else { - url <- paste("http://ichart.finance.yahoo.com/table.csv?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&ignore=.csv",sep = "") - ohlc <- read.table(url, header = TRUE, sep = ",") - ohlc[, "Adj.Close"] <- NULL - ohlc <- ohlc[order(ohlc[, "Date"]), ] - ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1]))) - } - } - else { - if (!quiet) - message("Unadjusted and adjusted dividend data are always returned.") - url <- paste("http://ichart.finance.yahoo.com/x?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&y=0&z=30000", sep = "") - ohlc <- read.table(url, skip = 1, sep = ",", fill = TRUE, as.is = TRUE) - div <- data.frame(Date = ohlc[ohlc[, "V1"] == "DIVIDEND", "V2"], Adj.Div = as.numeric(ohlc[ohlc[, "V1"] == "DIVIDEND", "V3"]), stringsAsFactors = FALSE) - spl <- data.frame(Date = ohlc[ohlc[, "V1"] == "SPLIT", "V2"], Split = as.character(ohlc[ohlc[, "V1"] == "SPLIT", "V3"]), stringsAsFactors = FALSE) - ohlc <- merge(div, spl, by.col = "Date", all = TRUE) - if (NROW(ohlc) == 0) - return(ohlc) - ohlc[, "Date"] <- as.Date(as.character(ohlc[, "Date"]), "%Y%m%d") - ohlc[, "Split"] <- sub(":", "/", ohlc[, "Split"]) - ohlc[, "Split"] <- 1/sapply(parse(text = ohlc[, "Split"]), eval) - ohlc <- ohlc[order(ohlc[, 1]), ] - ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1]))) - if (all(is.na(ohlc[, "Split"]))) { - s.ratio <- rep(1, NROW(ohlc)) - } - else { - s.ratio <- adjRatios(split = ohlc[, "Split"])[, 1] - } - ohlc <- cbind(ohlc, ohlc[, "Adj.Div"] * (1/s.ratio)) - colnames(ohlc)[3] <- "Div" - ohlc[, "Split"] <- as.numeric(ohlc[, "Split"]) - ohlc <- ohlc[, c("Div", "Split", "Adj.Div")] - } - ohlc <- ohlc[paste(beg, end, sep = "/"), ] - return(ohlc) -} diff --git a/getYahooData.R b/getYahooData.R new file mode 100644 index 0000000..a758ae1 --- /dev/null +++ b/getYahooData.R @@ -0,0 +1,93 @@ +getYahooData<-function (symbol, start, end, freq = "daily", type = "price", adjust = TRUE, quiet = FALSE) +{ + if (missing(start)) { + beg <- as.POSIXlt("1900-01-01") + } + else { + beg <- as.POSIXlt(as.Date(as.character(start), "%Y%m%d")) + } + if (missing(end)) { + end <- as.POSIXlt(Sys.Date()) + } + else { + end <- as.POSIXlt(as.Date(as.character(end), "%Y%m%d")) + } + if (beg > end) + stop("Start date must be before end date.") + if (beg > as.POSIXlt(Sys.Date())) + stop("Start date is after today's date.") + freq <- match.arg(freq, c("daily", "weekly", "monthly")) + type <- match.arg(type, c("price", "split")) + if (type == "price") { + freq.url <- substr(freq, 1, 1) + } + else { + freq.url <- "v" + if (freq != "daily" & !quiet) + message("Only freq=\"daily\" data available for type=\"split\".\n", "Setting freq=\"daily\"...") + } + flush.console() + if (type == "price") { + if (adjust) { + if (freq == "daily") { + ohlc <- getYahooData(symbol, start, freq = "daily", type = "price", adjust = FALSE, quiet = TRUE) + divspl <- getYahooData(symbol, start, freq = "daily", type = "split", adjust = FALSE, quiet = TRUE) + ohlc <- merge(ohlc, divspl, all = TRUE) + if (NROW(divspl) != 0) { + if(!all(is.na(ohlc[is.na(ohlc[,"Close"]),"Split"]))){ + cat("ugly WAT fix!\n") + ohlc[162,"Close"] <- 83.75 + } + adj <- adjRatios(ohlc[, "Split"], ohlc[, "Div"], ohlc[, "Close"]) + s.ratio <- adj[, 1] + d.ratio <- adj[, 2] + cn <- colnames(ohlc) + ohlc <- cbind(ohlc, ohlc[, "Close"]) + colnames(ohlc) <- c(cn, "Unadj.Close") + ohlc[time(ohlc[, "Open"] * d.ratio * s.ratio), "Open"] <- ohlc[, "Open"] * d.ratio * s.ratio + ohlc[time(ohlc[, "High"] * d.ratio * s.ratio), "High"] <- ohlc[, "High"] * d.ratio * s.ratio + ohlc[time(ohlc[, "Low"] * d.ratio * s.ratio), "Low"] <- ohlc[, "Low"] * d.ratio * s.ratio + ohlc[time(ohlc[, "Close"] * d.ratio * s.ratio), "Close"] <- ohlc[, "Close"] * d.ratio * s.ratio + ohlc[time(ohlc[, "Volume"] * (1/d.ratio)), "Volume"] <- ohlc[, "Volume"] * (1/d.ratio) + ohlc <- ohlc[, c("Open", "High", "Low", "Close", "Volume", "Unadj.Close", "Div", "Split", "Adj.Div")] + } + } + else stop("Only freq=\"daily\" adjusted data is currently supported.") + } + else { + url <- paste("http://ichart.finance.yahoo.com/table.csv?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&ignore=.csv",sep = "") + ohlc <- read.table(url, header = TRUE, sep = ",") + ohlc[, "Adj.Close"] <- NULL + ohlc <- ohlc[order(ohlc[, "Date"]), ] + ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1]))) + } + } + else { + if (!quiet) + message("Unadjusted and adjusted dividend data are always returned.") + url <- paste("http://ichart.finance.yahoo.com/x?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&y=0&z=30000", sep = "") + ohlc <- read.table(url, skip = 1, sep = ",", fill = TRUE, as.is = TRUE) + div <- data.frame(Date = ohlc[ohlc[, "V1"] == "DIVIDEND", "V2"], Adj.Div = as.numeric(ohlc[ohlc[, "V1"] == "DIVIDEND", "V3"]), stringsAsFactors = FALSE) + spl <- data.frame(Date = ohlc[ohlc[, "V1"] == "SPLIT", "V2"], Split = as.character(ohlc[ohlc[, "V1"] == "SPLIT", "V3"]), stringsAsFactors = FALSE) + ohlc <- merge(div, spl, by.col = "Date", all = TRUE) + if (NROW(ohlc) == 0) + return(ohlc) + ohlc[, "Date"] <- as.Date(as.character(ohlc[, "Date"]), "%Y%m%d") + ohlc[, "Split"] <- sub(":", "/", ohlc[, "Split"]) + ohlc[, "Split"] <- 1/sapply(parse(text = ohlc[, "Split"]), eval) + ohlc <- ohlc[order(ohlc[, 1]), ] + ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1]))) + if (all(is.na(ohlc[, "Split"]))) { + s.ratio <- rep(1, NROW(ohlc)) + } + else { + s.ratio <- adjRatios(split = ohlc[, "Split"])[, 1] + } + ohlc <- cbind(ohlc, ohlc[, "Adj.Div"] * (1/s.ratio)) + colnames(ohlc)[3] <- "Div" + ohlc[, "Split"] <- as.numeric(ohlc[, "Split"]) + ohlc <- ohlc[, c("Div", "Split", "Adj.Div")] + } + ohlc <- ohlc[paste(beg, end, sep = "/"), ] + return(ohlc) +} |
