atraining · October 3, 2016 08:14
diff --git a/wikifolio_parser.R b/wikifolio_parser.R
 # TODO - Code Cleaning 
 #################################
 #
 # Packages
 #
 #################################

 library(RCurl)
 library(reshape)
 library(xts)
 library(xtable)
 library(pander)
 library("PerformanceAnalytics")
 library(quantmod)

 # Strong Colors
 strong = c("black", "red", "blue3", "olivedrab3")


 ################################################################################
 #
 ## Brew Download URL
 #
 ################################################################################


 brew_price_url <- function(name_wikifolio, start_date_str,
                           end_date_str, download_price=TRUE){

 # account download url
 # https://www.wikifolio.com/dynamic/de/
 # de/invest/download?type=account-statement&name=KKDAXABW

 # Variables
 url_base <- 'https://www.wikifolio.com/de/de/wikifolio/'
 url_download_base <- 'https://www.wikifolio.com/dynamic/de/de/invest/download?'
 start_date_date = strptime(start_date_str, format = '%d.%m.%Y')
 end_date_date = strptime(end_date_str, format = '%d.%m.%Y')

 # URL GET Options
 option_wikiname = paste('name=', name_wikifolio, sep = "")
 if (download_price){
  option_frequency = 'type=daily'
 } else {
  option_frequency = 'type=account-statement'
 }

 option_start = paste('dateFrom=', start_date_str, sep = "")
 option_end = paste('dateFrom=', end_date_str, sep = "")

 url_download_query = paste(option_wikiname,
                            option_frequency,
                            option_start,
                            option_end, sep = '&')
 url_download = paste(url_download_base, url_download_query, sep = '')

 return(url_download)
 }

 #
 # brew_price_url('name_wikifolio', 'start_date_str',
 #                'end_date_str', download_price = F)
 #

 ################################################################################
 #
 ## Download Prices
 #
 ################################################################################

 get_prices <- function(name_wikifolio,start_date_str,end_date_str){
 # Brew Download URL
 url_price_download = brew_price_url(name_wikifolio,start_date_str,
                      end_date_str,download_price = T)

 # structure
 prices = c()
 prices$meta = c()
 prices$data = c()

 htmlRaw <- getURLContent(url_price_download,
                          followlocation = TRUE, binary = TRUE)
 # make a copy for conversion
 htmlAsc <- htmlRaw
 # find where the NULLs are
 htmlNul <- htmlRaw == as.raw(0)
 # modify the new vector NULLs to SPACEs
 htmlAsc[htmlNul] <- as.raw(20)
 # you can now convert these to Char
 htmlPage <- rawToChar(htmlAsc)
 htmlPage <- gsub("\024", "", htmlPage)

 # Get meta information
 df.meta <- read.csv(con <- textConnection(htmlPage),
                     header=FALSE,
                     sep = ';',
                     nrows = 4)
 close(con)
 prices$meta$start_date_date = strptime(start_date_str, format = '%d.%m.%Y')
 prices$meta$end_date_date = strptime(end_date_str, format = '%d.%m.%Y')
 prices$meta$wkn_wikifolio = toString(df.meta[1,1])
 prices$meta$human_name_wikifolio = toString(df.meta[2,1])
 prices$meta$header.names = sapply(unname(unlist(df.meta[c(3),])), toString)
 prices$meta$header.names = gsub(" ", "", prices$meta$header.names)
 prices$meta$header.names = gsub("\u00C4", "A", prices$meta$header.names)
 prices$meta$wiki_start_date_date = as.Date(droplevels(df.meta[4,1]),
                                format = '%d.%m.%Y %H:%M:%S')

 # make df from string
 df.price_data <- read.csv(con <- textConnection(htmlPage),
                           header=FALSE,
                           sep = ';',
                           dec = ',',
                           colClasses=c('factor',
                                        'factor',
                                        'numeric',
                                        'numeric',
                                        'numeric',
                                        'numeric'),
                           skip = 5)
 close(con)

 colnames(df.price_data) = prices$meta$header.names
 df.price_data = df.price_data[,-c(2)]
 df.price_data$Begindate = as.Date(df.price_data$Begindate,
                                      "%d.%m.%Y %H:%M:%S")

 prices$data$df.prices = df.price_data

 #
 ## Parse to xts
 #

 # convert Closing Prices to xts
 prices$data$xts <- xts(df.price_data[,2:5],df.price_data$Begindate)
 # colnames(Wikifolio) = prices$meta$human_name_wikifolio

 # Specify Period
 Wikifolio_period = paste(
  as.Date(prices$meta$wiki_start_date_date,format = "%Y-%M-%D"),
  as.Date(prices$meta$end_date_date,format = "%Y-%M-%D"),
  sep = '/'
 )

 prices$data$xts <- prices$data$xts[Wikifolio_period]
 colnames(prices$data$xts) = paste(
  # gsub(" ", "-", prices$meta$human_name_wikifolio),
  name_wikifolio,
  colnames(prices$data$xts), sep = ".")

 return(prices)
 }

 # TEST: Get Trade data
 # prices = get_prices('alpha-return-europe-leverage','15.05.2015','01.01.2016')

 ################################################################################
 #
 ## Get Trades
 #
 ################################################################################

 get_trades <- function(name_wikifolio,start_date_str,end_date_str){
 # Brew Download URL
 url_trade_download = brew_price_url(name_wikifolio,start_date_str,
                      end_date_str,download_price = F)
 # structure
 trades = c()
 trades$meta = c()
 trades$data = c()

 # Clean raw HTML from NULLs
 htmlRaw <- getURLContent(url_trade_download, followlocation = T, binary = T)
 save_my_trades = htmlRaw
 # make a copy for conversion
 htmlAsc <- htmlRaw
 # find where the NULLs are
 htmlNul <- htmlRaw == as.raw(0)
 # modify the new vector NULLs to SPACEs
 htmlAsc[htmlNul] <- as.raw(20)
 # you can now convert these to Char
 htmlPage <- rawToChar(htmlAsc)
 htmlPage <- gsub("\024", "", htmlPage)

 # Get meta information
 df.meta <- read.csv(con <- textConnection(htmlPage),
                     header=FALSE,
                     sep = ';',
                     nrows = 4)
 close(con)
 trades$meta$start_date_date = strptime(start_date_str, format = '%d.%m.%Y')
 trades$meta$end_date_date = strptime(end_date_str, format = '%d.%m.%Y')
 trades$meta$wkn_wikifolio = toString(droplevels(df.meta[1,1]))
 trades$meta$human_name_wikifolio = toString(droplevels(df.meta[2,1]))
 trades$meta$header.names = sapply(unname(unlist(df.meta[c(3),])), toString)
 trades$meta$header.names = gsub(" ", "", trades$meta$header.names)
 trades$meta$header.names = gsub('\u00C4', 'A', trades$meta$header.names)
 trades$meta$wiki_first_trade_date = as.Date(droplevels(df.meta[4,1]),
                                             format = '%d.%m.%Y %H:%M:%S')

 # remobve tsd sep
 # TODO
 # \\.
 # see stack overflow conversion maill


 df.trade_data <- read.csv(con <- textConnection(htmlPage),
                           header=FALSE,
                           sep = ';',
                           dec = ',',
                           # colClasses=c('factor',
                           #              'factor',
                           #              'factor',
                           #              'numeric',
                           #              'numeric',
                           #              'numeric',
                           #              'numeric',
                           #              'numeric'),
                           skip = 5)
 close(con)

 # Formatierunt runtergeladenes csv
 colnames(df.trade_data) = trades$meta$header.names
 df.trade_data$Datum = as.Date(df.trade_data$Datum , "%d.%m.%Y %H:%M:%S")
 df.trade_data$Cashnachher = as.numeric(df.trade_data$Cashnachher)
 df.trade_data$AnderungAnzahl = as.numeric(df.trade_data$AnderungAnzahl)
 df.trade_data$Anzahlnachher = as.numeric(df.trade_data$Anzahlnachher)
 df.trade_data$order_size = df.trade_data$AnderungCash/df.trade_data$Cashnachher


 trades$data$all = df.trade_data
 trades$meta$wiki_universe = levels(trades$data$all$ISIN)

 # Renaming Transaction Description
 levels(trades$data$all$Beschreibung) =
  sub("Wertpapier-Transaktion\\s\\(Kauf\\)",
      "Kauf", levels(trades$data$all$Beschreibung))

 levels(trades$data$all$Beschreibung) =
  sub("Wertpapier-Transaktion\\s\\(Verkauf\\)",
      "Verkauf", levels(trades$data$all$Beschreibung))

 trades$meta$wiki_transactions = levels(df.trade_data$Beschreibung)

 #
 ## Clean Trade Data
 #

 # Add Asset Categories
 # Categorize Asset Universe
 #setwd('D:/gdrive/imalytix/04_Auftrag/03_Alpha Return Europe Leverage')
 investment_universe =
  file.path(paste('D:\\gdrive\\imalytix\\04_Auftrag\\',
                  '03_Alpha Return Europe Leverage\\',
                  'data\\Investment_Universe.csv', sep = ''))
 df.universe = read.csv(investment_universe, sep = ";")

 # left outer join
 # all rows from frist table and any row
 # with matching keys from the right
 # we can do because we know that second has
 # no duplicates
 trades$data$all = merge(trades$data$all,
                         df.universe, by = 'ISIN',
                         all.x = T)



 trades$data$per_asset = cast(trades$data$all, Datum + WKN +
                               Bezeichnung + SecurityType +
                               Beschreibung + Anlageuniversum +
                               Preis + AnderungAnzahl +
                               Anzahlnachher + Cashnachher ~ ISIN
                              , mean, value = 'AnderungCash')



 trades$data$fees = trades$data$per_asset[
  apply(trades$data$per_asset[11:ncol(trades$data$per_asset)],
        1, function(X) all(is.nan(X))),]

 trades$data$trades = trades$data$per_asset[
  apply(trades$data$per_asset[12:ncol(trades$data$per_asset)],
        1, function(X) !all(is.nan(X))),]


 return(trades)

 }

 # TEST: Get Trade data
 #trades = get_trades('alpha-return-europe-leverage','15.05.2015','01.01.2016')
 #

 save_df_as_csv <- function(df){
 # Write Data Backup
 filename = paste(format(end_date_date, "%Y%m%d"),
                  format(start_date_date, "%Y%m%d"),
                  wkn_wikifolio,
                  human_name_wikifolio,
                  ".csv",
                  sep = "_")
 full_path = file.path(paste('D:\\gdrive\\imalytix\\04_Auftrag\\',
                 '03_Alpha Return Europe Leverage\\',
                 'data\\',
                 filename, sep = ''))

 write.csv(df, file = full_path)
 }





 # #
 # ## Day Return
 # #
 #
 #
 # basket_percentage = basket_percentage[-c(1),]
 # zoo.basket_percentage <- as.zoo(basket_percentage)
 # zoo.basket_percentage <-na.approx(zoo.basket_percentage)
 #
 #
 # #
 # ## Cumulative Return
 # #
 # zoo.data$ts <- as.zoo(data$ts)
 # zoo.data$ts <-na.approx(zoo.data$ts)
 #
 # # Preprocessing Prices
 #
 # # First Day Price
 # Wikifolio.Close <- Wikifolio[,1]
 # GDAXI.Close <- GDAXI[,4]
 # GSPC.Close <- GSPC[,4]
 # STOXX50E.Close <- STOXX50E[,4]
 #
 #
 # # Get first day closing price
 # Wikifolio1 <- as.numeric(Wikifolio.Close[1])
 # DAX1 <- as.numeric(GDAXI.Close[1])
 # MDAXI1 <- as.numeric(GSPC.Close[1])
 # SDAXI1 <- as.numeric(STOXX50E.Close[1])
 #
 #
 # # Percentage
 # Wikifolio <- (Wikifolio.Close/Wikifolio1)
 # GDAXI <- (GDAXI.Close/DAX1)
 # GSPC <- (GSPC.Close/MDAXI1)
 # STOXX50E <- (STOXX50E.Close/SDAXI1)
 #
 #
 # # Combine all Assets
 # data$ts <- cbind(Wikifolio, GDAXI, GSPC, STOXX50E)
 # zoo.data$ts <- as.zoo(data$ts)
 # zoo.data$ts <-na.approx(zoo.data$ts)
	# TODO - Code Cleaning
	#################################
	#
	# Packages
	#
	#################################

	library(RCurl)
	library(reshape)
	library(xts)
	library(xtable)
	library(pander)
	library("PerformanceAnalytics")
	library(quantmod)

	# Strong Colors
	strong = c("black", "red", "blue3", "olivedrab3")


	################################################################################
	#
	## Brew Download URL
	#
	################################################################################


	brew_price_url <- function(name_wikifolio, start_date_str,
	end_date_str, download_price=TRUE){

	# account download url
	# https://www.wikifolio.com/dynamic/de/
	# de/invest/download?type=account-statement&name=KKDAXABW

	# Variables
	url_base <- 'https://www.wikifolio.com/de/de/wikifolio/'
	url_download_base <- 'https://www.wikifolio.com/dynamic/de/de/invest/download?'
	start_date_date = strptime(start_date_str, format = '%d.%m.%Y')
	end_date_date = strptime(end_date_str, format = '%d.%m.%Y')

	# URL GET Options
	option_wikiname = paste('name=', name_wikifolio, sep = "")
	if (download_price){
	option_frequency = 'type=daily'
	} else {
	option_frequency = 'type=account-statement'
	}

	option_start = paste('dateFrom=', start_date_str, sep = "")
	option_end = paste('dateFrom=', end_date_str, sep = "")

	url_download_query = paste(option_wikiname,
	option_frequency,
	option_start,
	option_end, sep = '&')
	url_download = paste(url_download_base, url_download_query, sep = '')

	return(url_download)
	}

	#
	# brew_price_url('name_wikifolio', 'start_date_str',
	# 'end_date_str', download_price = F)
	#

	################################################################################
	#
	## Download Prices
	#
	################################################################################

	get_prices <- function(name_wikifolio,start_date_str,end_date_str){
	# Brew Download URL
	url_price_download = brew_price_url(name_wikifolio,start_date_str,
	end_date_str,download_price = T)

	# structure
	prices = c()
	prices$meta = c()
	prices$data = c()

	htmlRaw <- getURLContent(url_price_download,
	followlocation = TRUE, binary = TRUE)
	# make a copy for conversion
	htmlAsc <- htmlRaw
	# find where the NULLs are
	htmlNul <- htmlRaw == as.raw(0)
	# modify the new vector NULLs to SPACEs
	htmlAsc[htmlNul] <- as.raw(20)
	# you can now convert these to Char
	htmlPage <- rawToChar(htmlAsc)
	htmlPage <- gsub("\024", "", htmlPage)

	# Get meta information
	df.meta <- read.csv(con <- textConnection(htmlPage),
	header=FALSE,
	sep = ';',
	nrows = 4)
	close(con)
	prices$meta$start_date_date = strptime(start_date_str, format = '%d.%m.%Y')
	prices$meta$end_date_date = strptime(end_date_str, format = '%d.%m.%Y')
	prices$meta$wkn_wikifolio = toString(df.meta[1,1])
	prices$meta$human_name_wikifolio = toString(df.meta[2,1])
	prices$meta$header.names = sapply(unname(unlist(df.meta[c(3),])), toString)
	prices$meta$header.names = gsub(" ", "", prices$meta$header.names)
	prices$meta$header.names = gsub("\u00C4", "A", prices$meta$header.names)
	prices$meta$wiki_start_date_date = as.Date(droplevels(df.meta[4,1]),
	format = '%d.%m.%Y %H:%M:%S')

	# make df from string
	df.price_data <- read.csv(con <- textConnection(htmlPage),
	header=FALSE,
	sep = ';',
	dec = ',',
	colClasses=c('factor',
	'factor',
	'numeric',
	'numeric',
	'numeric',
	'numeric'),
	skip = 5)
	close(con)

	colnames(df.price_data) = prices$meta$header.names
	df.price_data = df.price_data[,-c(2)]
	df.price_data$Begindate = as.Date(df.price_data$Begindate,
	"%d.%m.%Y %H:%M:%S")

	prices$data$df.prices = df.price_data

	#
	## Parse to xts
	#

	# convert Closing Prices to xts
	prices$data$xts <- xts(df.price_data[,2:5],df.price_data$Begindate)
	# colnames(Wikifolio) = prices$meta$human_name_wikifolio

	# Specify Period
	Wikifolio_period = paste(
	as.Date(prices$meta$wiki_start_date_date,format = "%Y-%M-%D"),
	as.Date(prices$meta$end_date_date,format = "%Y-%M-%D"),
	sep = '/'
	)

	prices$data$xts <- prices$data$xts[Wikifolio_period]
	colnames(prices$data$xts) = paste(
	# gsub(" ", "-", prices$meta$human_name_wikifolio),
	name_wikifolio,
	colnames(prices$data$xts), sep = ".")

	return(prices)
	}

	# TEST: Get Trade data
	# prices = get_prices('alpha-return-europe-leverage','15.05.2015','01.01.2016')

	################################################################################
	#
	## Get Trades
	#
	################################################################################

	get_trades <- function(name_wikifolio,start_date_str,end_date_str){
	# Brew Download URL
	url_trade_download = brew_price_url(name_wikifolio,start_date_str,
	end_date_str,download_price = F)
	# structure
	trades = c()
	trades$meta = c()
	trades$data = c()

	# Clean raw HTML from NULLs
	htmlRaw <- getURLContent(url_trade_download, followlocation = T, binary = T)
	save_my_trades = htmlRaw
	# make a copy for conversion
	htmlAsc <- htmlRaw
	# find where the NULLs are
	htmlNul <- htmlRaw == as.raw(0)
	# modify the new vector NULLs to SPACEs
	htmlAsc[htmlNul] <- as.raw(20)
	# you can now convert these to Char
	htmlPage <- rawToChar(htmlAsc)
	htmlPage <- gsub("\024", "", htmlPage)

	# Get meta information
	df.meta <- read.csv(con <- textConnection(htmlPage),
	header=FALSE,
	sep = ';',
	nrows = 4)
	close(con)
	trades$meta$start_date_date = strptime(start_date_str, format = '%d.%m.%Y')
	trades$meta$end_date_date = strptime(end_date_str, format = '%d.%m.%Y')
	trades$meta$wkn_wikifolio = toString(droplevels(df.meta[1,1]))
	trades$meta$human_name_wikifolio = toString(droplevels(df.meta[2,1]))
	trades$meta$header.names = sapply(unname(unlist(df.meta[c(3),])), toString)
	trades$meta$header.names = gsub(" ", "", trades$meta$header.names)
	trades$meta$header.names = gsub('\u00C4', 'A', trades$meta$header.names)
	trades$meta$wiki_first_trade_date = as.Date(droplevels(df.meta[4,1]),
	format = '%d.%m.%Y %H:%M:%S')

	# remobve tsd sep
	# TODO
	# \\.
	# see stack overflow conversion maill


	df.trade_data <- read.csv(con <- textConnection(htmlPage),
	header=FALSE,
	sep = ';',
	dec = ',',
	# colClasses=c('factor',
	# 'factor',
	# 'factor',
	# 'numeric',
	# 'numeric',
	# 'numeric',
	# 'numeric',
	# 'numeric'),
	skip = 5)
	close(con)

	# Formatierunt runtergeladenes csv
	colnames(df.trade_data) = trades$meta$header.names
	df.trade_data$Datum = as.Date(df.trade_data$Datum , "%d.%m.%Y %H:%M:%S")
	df.trade_data$Cashnachher = as.numeric(df.trade_data$Cashnachher)
	df.trade_data$AnderungAnzahl = as.numeric(df.trade_data$AnderungAnzahl)
	df.trade_data$Anzahlnachher = as.numeric(df.trade_data$Anzahlnachher)
	df.trade_data$order_size = df.trade_data$AnderungCash/df.trade_data$Cashnachher


	trades$data$all = df.trade_data
	trades$meta$wiki_universe = levels(trades$data$all$ISIN)

	# Renaming Transaction Description
	levels(trades$data$all$Beschreibung) =
	sub("Wertpapier-Transaktion\\s\\(Kauf\\)",
	"Kauf", levels(trades$data$all$Beschreibung))

	levels(trades$data$all$Beschreibung) =
	sub("Wertpapier-Transaktion\\s\\(Verkauf\\)",
	"Verkauf", levels(trades$data$all$Beschreibung))

	trades$meta$wiki_transactions = levels(df.trade_data$Beschreibung)

	#
	## Clean Trade Data
	#

	# Add Asset Categories
	# Categorize Asset Universe
	#setwd('D:/gdrive/imalytix/04_Auftrag/03_Alpha Return Europe Leverage')
	investment_universe =
	file.path(paste('D:\\gdrive\\imalytix\\04_Auftrag\\',
	'03_Alpha Return Europe Leverage\\',
	'data\\Investment_Universe.csv', sep = ''))
	df.universe = read.csv(investment_universe, sep = ";")

	# left outer join
	# all rows from frist table and any row
	# with matching keys from the right
	# we can do because we know that second has
	# no duplicates
	trades$data$all = merge(trades$data$all,
	df.universe, by = 'ISIN',
	all.x = T)



	trades$data$per_asset = cast(trades$data$all, Datum + WKN +
	Bezeichnung + SecurityType +
	Beschreibung + Anlageuniversum +
	Preis + AnderungAnzahl +
	Anzahlnachher + Cashnachher ~ ISIN
	, mean, value = 'AnderungCash')



	trades$data$fees = trades$data$per_asset[
	apply(trades$data$per_asset[11:ncol(trades$data$per_asset)],
	1, function(X) all(is.nan(X))),]

	trades$data$trades = trades$data$per_asset[
	apply(trades$data$per_asset[12:ncol(trades$data$per_asset)],
	1, function(X) !all(is.nan(X))),]


	return(trades)

	}

	# TEST: Get Trade data
	#trades = get_trades('alpha-return-europe-leverage','15.05.2015','01.01.2016')
	#

	save_df_as_csv <- function(df){
	# Write Data Backup
	filename = paste(format(end_date_date, "%Y%m%d"),
	format(start_date_date, "%Y%m%d"),
	wkn_wikifolio,
	human_name_wikifolio,
	".csv",
	sep = "_")
	full_path = file.path(paste('D:\\gdrive\\imalytix\\04_Auftrag\\',
	'03_Alpha Return Europe Leverage\\',
	'data\\',
	filename, sep = ''))

	write.csv(df, file = full_path)
	}





	# #
	# ## Day Return
	# #
	#
	#
	# basket_percentage = basket_percentage[-c(1),]
	# zoo.basket_percentage <- as.zoo(basket_percentage)
	# zoo.basket_percentage <-na.approx(zoo.basket_percentage)
	#
	#
	# #
	# ## Cumulative Return
	# #
	# zoo.data$ts <- as.zoo(data$ts)
	# zoo.data$ts <-na.approx(zoo.data$ts)
	#
	# # Preprocessing Prices
	#
	# # First Day Price
	# Wikifolio.Close <- Wikifolio[,1]
	# GDAXI.Close <- GDAXI[,4]
	# GSPC.Close <- GSPC[,4]
	# STOXX50E.Close <- STOXX50E[,4]
	#
	#
	# # Get first day closing price
	# Wikifolio1 <- as.numeric(Wikifolio.Close[1])
	# DAX1 <- as.numeric(GDAXI.Close[1])
	# MDAXI1 <- as.numeric(GSPC.Close[1])
	# SDAXI1 <- as.numeric(STOXX50E.Close[1])
	#
	#
	# # Percentage
	# Wikifolio <- (Wikifolio.Close/Wikifolio1)
	# GDAXI <- (GDAXI.Close/DAX1)
	# GSPC <- (GSPC.Close/MDAXI1)
	# STOXX50E <- (STOXX50E.Close/SDAXI1)
	#
	#
	# # Combine all Assets
	# data$ts <- cbind(Wikifolio, GDAXI, GSPC, STOXX50E)
	# zoo.data$ts <- as.zoo(data$ts)
	# zoo.data$ts <-na.approx(zoo.data$ts)