summaryrefslogtreecommitdiffstats
path: root/getYahooData.R
blob: a758ae1198851c498e2b91dc2b32dd570db40c67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
getYahooData<-function (symbol, start, end, freq = "daily", type = "price", adjust = TRUE, quiet = FALSE) 
{
    if (missing(start)) {
        beg <- as.POSIXlt("1900-01-01")
    }
    else {
        beg <- as.POSIXlt(as.Date(as.character(start), "%Y%m%d"))
    }
    if (missing(end)) {
        end <- as.POSIXlt(Sys.Date())
    }
    else {
        end <- as.POSIXlt(as.Date(as.character(end), "%Y%m%d"))
    }
    if (beg > end) 
        stop("Start date must be before end date.")
    if (beg > as.POSIXlt(Sys.Date())) 
        stop("Start date is after today's date.")
    freq <- match.arg(freq, c("daily", "weekly", "monthly"))
    type <- match.arg(type, c("price", "split"))
    if (type == "price") {
        freq.url <- substr(freq, 1, 1)
    }
    else {
        freq.url <- "v"
        if (freq != "daily" & !quiet) 
            message("Only freq=\"daily\" data available for type=\"split\".\n", "Setting freq=\"daily\"...")
    }
    flush.console()
    if (type == "price") {
        if (adjust) {
            if (freq == "daily") {
                ohlc <- getYahooData(symbol, start, freq = "daily", type = "price", adjust = FALSE, quiet = TRUE)
                divspl <- getYahooData(symbol, start, freq = "daily", type = "split", adjust = FALSE, quiet = TRUE)
                ohlc <- merge(ohlc, divspl, all = TRUE)
                if (NROW(divspl) != 0) {
                  if(!all(is.na(ohlc[is.na(ohlc[,"Close"]),"Split"]))){
                    cat("ugly WAT fix!\n")
                    ohlc[162,"Close"] <- 83.75
                  }
                  adj <- adjRatios(ohlc[, "Split"], ohlc[, "Div"], ohlc[, "Close"])
                  s.ratio <- adj[, 1]
                  d.ratio <- adj[, 2]
                  cn <- colnames(ohlc)
                  ohlc <- cbind(ohlc, ohlc[, "Close"])
                  colnames(ohlc) <- c(cn, "Unadj.Close")
                  ohlc[time(ohlc[, "Open"] * d.ratio * s.ratio), "Open"] <- ohlc[, "Open"] * d.ratio * s.ratio
                  ohlc[time(ohlc[, "High"] * d.ratio * s.ratio), "High"] <- ohlc[, "High"] * d.ratio * s.ratio
                  ohlc[time(ohlc[, "Low"] * d.ratio * s.ratio), "Low"] <- ohlc[, "Low"] * d.ratio * s.ratio
                  ohlc[time(ohlc[, "Close"] * d.ratio * s.ratio), "Close"] <- ohlc[, "Close"] * d.ratio * s.ratio
                  ohlc[time(ohlc[, "Volume"] * (1/d.ratio)), "Volume"] <- ohlc[, "Volume"] * (1/d.ratio)
                  ohlc <- ohlc[, c("Open", "High", "Low", "Close", "Volume", "Unadj.Close", "Div", "Split", "Adj.Div")]
                }
            }
            else stop("Only freq=\"daily\" adjusted data is currently supported.")
        }
        else {
            url <- paste("http://ichart.finance.yahoo.com/table.csv?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&ignore=.csv",sep = "")
            ohlc <- read.table(url, header = TRUE, sep = ",")
            ohlc[, "Adj.Close"] <- NULL
            ohlc <- ohlc[order(ohlc[, "Date"]), ]
            ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1])))
        }
    }
    else {
        if (!quiet) 
            message("Unadjusted and adjusted dividend data are always returned.")
        url <- paste("http://ichart.finance.yahoo.com/x?s=", symbol, "&a=", beg$mon, "&b=", beg$mday, "&c=", beg$year + 1900, "&d=", end$mon, "&e=", end$mday, "&f=", end$year + 1900, "&g=", freq.url, "&y=0&z=30000", sep = "")
        ohlc <- read.table(url, skip = 1, sep = ",", fill = TRUE, as.is = TRUE)
        div <- data.frame(Date = ohlc[ohlc[, "V1"] == "DIVIDEND", "V2"], Adj.Div = as.numeric(ohlc[ohlc[, "V1"] == "DIVIDEND", "V3"]), stringsAsFactors = FALSE)
        spl <- data.frame(Date = ohlc[ohlc[, "V1"] == "SPLIT", "V2"], Split = as.character(ohlc[ohlc[, "V1"] == "SPLIT", "V3"]), stringsAsFactors = FALSE)
        ohlc <- merge(div, spl, by.col = "Date", all = TRUE)
        if (NROW(ohlc) == 0) 
            return(ohlc)
        ohlc[, "Date"] <- as.Date(as.character(ohlc[, "Date"]), "%Y%m%d")
        ohlc[, "Split"] <- sub(":", "/", ohlc[, "Split"])
        ohlc[, "Split"] <- 1/sapply(parse(text = ohlc[, "Split"]), eval)
        ohlc <- ohlc[order(ohlc[, 1]), ]
        ohlc <- xts(ohlc[, -1], as.POSIXct(as.character(ohlc[, 1])))
        if (all(is.na(ohlc[, "Split"]))) {
            s.ratio <- rep(1, NROW(ohlc))
        }
        else {
            s.ratio <- adjRatios(split = ohlc[, "Split"])[, 1]
        }
        ohlc <- cbind(ohlc, ohlc[, "Adj.Div"] * (1/s.ratio))
        colnames(ohlc)[3] <- "Div"
        ohlc[, "Split"] <- as.numeric(ohlc[, "Split"])
        ohlc <- ohlc[, c("Div", "Split", "Adj.Div")]
    }
    ohlc <- ohlc[paste(beg, end, sep = "/"), ]
    return(ohlc)
}