diff --git a/ch09-risk/data/download-symbol.sh b/ch09-risk/data/download-symbol.sh index 6d194102..0fb128dd 100755 --- a/ch09-risk/data/download-symbol.sh +++ b/ch09-risk/data/download-symbol.sh @@ -4,4 +4,4 @@ # # See LICENSE file for further information. -curl -o $2/$1.csv https://ichart.yahoo.com/table.csv?s=$1&a=0&b=1&c=2000&d=0&e=31&f=2013&g=d&ignore=.csv +wget -O ${2}/${1}.csv "http://www.google.com/finance/historical?q=${1}&startdate=Jan+01%2C+2000&enddate=Dec+31%2C+2013&output=csv" diff --git a/ch09-risk/src/main/scala/com/cloudera/datascience/risk/RunRisk.scala b/ch09-risk/src/main/scala/com/cloudera/datascience/risk/RunRisk.scala index eb688ef9..2e2c31b6 100644 --- a/ch09-risk/src/main/scala/com/cloudera/datascience/risk/RunRisk.scala +++ b/ch09-risk/src/main/scala/com/cloudera/datascience/risk/RunRisk.scala @@ -57,10 +57,10 @@ class RunRisk(private val spark: SparkSession) { import spark.implicits._ /** - * Reads a history in the Yahoo format + * Reads a history in the Google format */ - def readYahooHistory(file: File): Array[(LocalDate, Double)] = { - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + def readGoogleHistory(file: File): Array[(LocalDate, Double)] = { + val formatter = DateTimeFormatter.ofPattern("d-MMM-yy") val lines = scala.io.Source.fromFile(file).getLines().toSeq lines.tail.map { line => val cols = line.split(',') @@ -125,7 +125,7 @@ class RunRisk(private val spark: SparkSession) { val files = stocksDir.listFiles() val allStocks = files.iterator.flatMap { file => try { - Some(readYahooHistory(file)) + Some(readGoogleHistory(file)) } catch { case e: Exception => None } @@ -135,7 +135,7 @@ class RunRisk(private val spark: SparkSession) { val factorsPrefix = "factors/" val rawFactors = Array("^GSPC.csv", "^IXIC.csv", "^TYX.csv", "^FVX.csv"). map(x => new File(factorsPrefix + x)). - map(readYahooHistory) + map(readGoogleHistory) val stocks = rawStocks.map(trimToRegion(_, start, end)).map(fillInHistory(_, start, end))