import os import numpy as np import pandas as pd import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_mom = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Momentum' dir_sect_port = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/Portfolios' #dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean\\' #dir_mom = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Momentum' Min_nr_of_values = 10 #Investable universe winner_cut = 10 loser_cut = 1 MV_decile = 5 #delete all values in the lower 5 deciles of MV (50%) os.chdir(dir_clean) ### Import clean data sets ##### tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0) mv_nor = pd.read_csv('MV Norway clean.csv', index_col=0) mv_swe = pd.read_csv('MV Sweden clean.csv', index_col=0) mv_dk = pd.read_csv('MV Denmark clean.csv', index_col=0) mv_fin = pd.read_csv('MV Finland clean.csv', index_col=0) tr_nor = tr_nor.loc[:,tr_nor.columns[tr_nor.columns.isin(mv_nor.columns)]] tr_swe = tr_swe.loc[:,tr_swe.columns[tr_swe.columns.isin(mv_swe.columns)]] tr_dk = tr_dk.loc[:,tr_dk.columns[tr_dk.columns.isin(mv_dk.columns)]] tr_fin = tr_fin.loc[:,tr_fin.columns[tr_fin.columns.isin(mv_fin.columns)]] base_period = [6, 12] for base in base_period: ######## Calculate 2-(base_period)M momentum return ######## def MomReturnsSimple(prices): mom_sim = prices.shift(1)/prices.shift(base+1) - 1 return mom_sim.iloc[(base-1):,:] mom_nor_s = MomReturnsSimple(tr_nor.copy()) mom_swe_s = MomReturnsSimple(tr_swe.copy()) mom_dk_s = MomReturnsSimple(tr_dk.copy()) mom_fin_s = MomReturnsSimple(tr_fin.copy()) ###### Devide up into deciles and choose the def MV_quantiles(MV, MOM): for i in MV.index: rowquantiles = pd.qcut(MV.loc[i,:], 10, labels= np.arange(1,11,1)) for j in range(len(MV.loc[i])): if rowquantiles[MV.columns[j]] <= MV_decile: MOM.loc[i,MV.columns[j]] = np.nan else: continue return MOM mom_nor = MV_quantiles(mv_nor, mom_nor_s.copy()); mom_swe = MV_quantiles(mv_swe, mom_swe_s.copy()); mom_dk = MV_quantiles(mv_dk, mom_dk_s.copy()); mom_fin = MV_quantiles(mv_fin, mom_fin_s.copy()); ######### Ranking Momentum ######## def MomRank(mom): for i in range(len(mom)): row_i_ranked = mom.iloc[i,:].rank(ascending=False) mom.iloc[i,:] = row_i_ranked # replace returns with ranking return mom ranked_nor = MomRank(mom_nor.copy()) ranked_swe = MomRank(mom_swe.copy()) ranked_dk = MomRank(mom_dk.copy()) ranked_fin = MomRank(mom_fin.copy()) ######### Decile Ranking ######## def DecileRank(mom_ranked): for i in range(len(mom_ranked)): if np.isnan(mom_ranked.iloc[i,:]).sum() == len(mom_ranked.columns): continue else: row_i_decile_ranked = pd.qcut(mom_ranked.iloc[i,:], 10, labels= np.arange(10,0,-1)) mom_ranked.iloc[i,:] = row_i_decile_ranked # replace ranking with deciles return mom_ranked decile_nor = DecileRank(ranked_nor.copy()) decile_swe = DecileRank(ranked_swe.copy()) decile_dk = DecileRank(ranked_dk.copy()) decile_fin = DecileRank(ranked_fin.copy()) ######## Long/Short Portfolio Construction ########### def PortConstr(Deciles): months = Deciles.index.tolist() long_port = dict([(key, []) for key in months]) #Dictionary to save the portfolio every month short_port = dict([(key, []) for key in months]) for i in range(len(months)): # Get the highest and lowest decile stocks longs = Deciles.loc[months[i]][Deciles.loc[months[i]] >= winner_cut].index.tolist() shorts = Deciles.loc[months[i]][Deciles.loc[months[i]] <= loser_cut].index.tolist() long_port[months[i]] = longs # Save the stock names to the dictionaries short_port[months[i]] = shorts long_portfolios = {k: v for k, v in long_port.items() if v} short_portfolios = {k: v for k, v in short_port.items() if v} return long_portfolios, short_portfolios long_port_nor = PortConstr(decile_nor)[0]; short_port_nor = PortConstr(decile_nor)[1] long_port_swe = PortConstr(decile_swe)[0]; short_port_swe = PortConstr(decile_swe)[1] long_port_dk = PortConstr(decile_dk)[0]; short_port_dk = PortConstr(decile_dk)[1] long_port_fin = PortConstr(decile_fin)[0]; short_port_fin = PortConstr(decile_fin)[1] os.getcwd() os.chdir(dir_mom) ######## Save the portfolios (dictionaries) as JSON######### dict_dict = {'long_port_nor':long_port_nor, 'short_port_nor':short_port_nor, 'long_port_swe':long_port_swe, 'short_port_swe':short_port_swe, 'long_port_dk':long_port_dk, 'short_port_dk':short_port_dk, 'long_port_fin':long_port_fin, 'short_port_fin':short_port_fin} for name,dict_ in dict_dict.items(): with open('Mom 2-{}M {}.json'.format(base,name), 'w') as fp: json.dump(dict_, fp) ## For the sector analysis - Spør Jama os.chdir(dir_sect_port) for name,dict_ in dict_dict.items(): with open('Mom 2-{}M {}.json'.format(base,name), 'w') as fp: json.dump(dict_, fp) import os import numpy as np import pandas as pd import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_bm = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/Book-to-Market' dir_sect = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/Portfolios' #dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean\\' #dir_bm = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Value\\Book-to-Market' Min_nr_of_values = 10 #Investable universe winner_cut = 10 loser_cut = 1 MV_decile = 5 #delete all values in the lower 5 deciles of MV (50%) os.chdir(dir_clean) ########## Retrieve Market-to-Book values calculated (lagged) ############ bm_nor = pd.read_csv('BtM Norway clean.csv', index_col=0) bm_swe = pd.read_csv('BtM Sweden clean.csv', index_col=0) bm_dk = pd.read_csv('BtM Denmark clean.csv', index_col=0) bm_fin = pd.read_csv('BtM Finland clean.csv', index_col=0) ########## Retrieve MV to exclude lower 50% ############### mv_nor = pd.read_csv('MV Norway clean.csv', index_col=0) mv_swe = pd.read_csv('MV Sweden clean.csv', index_col=0) mv_dk = pd.read_csv('MV Denmark clean.csv', index_col=0) mv_fin = pd.read_csv('MV Finland clean.csv', index_col=0) ######### Delete all values with MV < 50% of average ############## def MV_quantiles(MV, BM): for i in MV.index: rowquantiles = pd.qcut(MV.loc[i,:], 10, labels= np.arange(1,11,1)) for j in range(len(MV.loc[i])): if rowquantiles[MV.columns[j]] <= MV_decile: BM.loc[i,MV.columns[j]] = np.nan else: continue return BM BM_nor = MV_quantiles(mv_nor, bm_nor.copy()) BM_swe = MV_quantiles(mv_swe, bm_swe.copy()) BM_dk = MV_quantiles(mv_dk, bm_dk.copy()) BM_fin = MV_quantiles(mv_fin, bm_fin.copy()) ########################## Rank values ################################ def Rank(BM): for i in range(len(BM)): row_i_ranked = BM.iloc[i,:].rank(ascending=False) BM.iloc[i,:] = row_i_ranked # replace returns with ranking return BM ranked_nor = Rank(BM_nor.copy()) ranked_swe = Rank(BM_swe.copy()) ranked_dk = Rank(BM_dk.copy()) ranked_fin = Rank(BM_fin.copy()) ####################### Create deciles based on ranking ############### def DecileRank(Ranked): for i in range(len(Ranked)): if ranked_nor.iloc[i,:].count() < Min_nr_of_values: continue else: row_i_decile_ranked = pd.qcut(Ranked.iloc[i,:], 10, labels= np.arange(10,0,-1)) Ranked.iloc[i,:] = row_i_decile_ranked # replace ranking with deciles return Ranked decile_nor = DecileRank(ranked_nor.copy()) decile_swe = DecileRank(ranked_swe.copy()) decile_dk = DecileRank(ranked_dk.copy()) decile_fin = DecileRank(ranked_fin.copy()) ######## Long/Short Portfolio Construction ########### def PortConstr(Deciles): months = Deciles.index.tolist() long_port = dict([(key, []) for key in months]) #Dictionary to save the portfolio every month short_port = dict([(key, []) for key in months]) for i in range(len(months)): # Get the highest and lowest decile stocks longs = Deciles.loc[months[i]][Deciles.loc[months[i]] >= winner_cut].index.tolist() shorts = Deciles.loc[months[i]][Deciles.loc[months[i]] <= loser_cut].index.tolist() long_port[months[i]] = longs # Save the stock names to the dictionaries short_port[months[i]] = shorts long_portfolios = {k: v for k, v in long_port.items() if v} short_portfolios = {k: v for k, v in short_port.items() if v} return long_portfolios, short_portfolios long_port_nor = PortConstr(decile_nor)[0]; short_port_nor = PortConstr(decile_nor)[1] long_port_swe = PortConstr(decile_swe)[0]; short_port_swe = PortConstr(decile_swe)[1] long_port_dk = PortConstr(decile_dk)[0]; short_port_dk = PortConstr(decile_dk)[1] long_port_fin = PortConstr(decile_fin)[0]; short_port_fin = PortConstr(decile_fin)[1] os.chdir(dir_bm) ######## Save the dictionaries as CSV ######### dict_dict = {'long_port_nor':long_port_nor, 'short_port_nor':short_port_nor, 'long_port_swe':long_port_swe, 'short_port_swe':short_port_swe, 'long_port_dk':long_port_dk, 'short_port_dk':short_port_dk, 'long_port_fin':long_port_fin, 'short_port_fin':short_port_fin} for name,dict_ in dict_dict.items(): with open('BM {}.json'.format(name), 'w') as fp: json.dump(dict_, fp) ## For the sector analysis - Spør Jama os.chdir(dir_sect) for name,dict_ in dict_dict.items(): with open('BM {}.json'.format(name), 'w') as fp: json.dump(dict_, fp) import os import pandas as pd import numpy as np import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_cf = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/CF-to-Market' dir_sect = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/Portfolios' #dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean\\' #dir_cf = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Value\\CF-to-Market' Min_nr_of_values = 10 #Investable universe winner_cut = 10 loser_cut = 1 MV_decile = 5 #delete all values in the lower 5 deciles of MV (50%) os.chdir(dir_clean) ########## Retrieve clean FCF and MV values ############ fm_nor = pd.read_csv('CFtM Norway clean.csv', index_col=0) fm_swe = pd.read_csv('CFtM Sweden clean.csv', index_col=0) fm_dk = pd.read_csv('CFtM Denmark clean.csv', index_col=0) fm_fin = pd.read_csv('CFtM Finland clean.csv', index_col=0) mv_nor = pd.read_csv('MV Norway clean.csv', index_col=0) mv_swe = pd.read_csv('MV Sweden clean.csv', index_col=0) mv_dk = pd.read_csv('MV Denmark clean.csv', index_col=0) mv_fin = pd.read_csv('MV Finland clean.csv', index_col=0) def MV_quantiles(MV, CF_M): for i in MV.index: rowquantiles = pd.qcut(MV.loc[i,:], 10, labels= np.arange(1,11,1)) for j in range(len(MV.loc[i])): if rowquantiles[MV.columns[j]] <= MV_decile: CF_M.loc[i,MV.columns[j]] = np.nan else: continue return CF_M CF_MV_nor = MV_quantiles(mv_nor, fm_nor.copy()) CF_MV_swe = MV_quantiles(mv_swe, fm_swe.copy()) CF_MV_dk = MV_quantiles(mv_dk, fm_dk.copy()) CF_MV_fin = MV_quantiles(mv_fin, fm_fin.copy()) ########################## Rank values ################################ def Rank(CF_MV): for i in range(len(CF_MV)): row_i_ranked = CF_MV.iloc[i,:].rank(ascending=False) CF_MV.iloc[i,:] = row_i_ranked # replace returns with ranking return CF_MV ranked_nor = Rank(CF_MV_nor.copy()) ranked_swe = Rank(CF_MV_swe.copy()) ranked_dk = Rank(CF_MV_dk.copy()) ranked_fin = Rank(CF_MV_fin.copy()) ####################### Create deciles based on ranking ############### def DecileRank(Ranked): for i in range(len(Ranked)): if Ranked.iloc[i,:].count() < Min_nr_of_values: Ranked.iloc[i,:] = np.nan continue else: row_i_decile_ranked = pd.qcut(Ranked.iloc[i,:], 10, labels= np.arange(10,0,-1)) Ranked.iloc[i,:] = row_i_decile_ranked # replace ranking with deciles return Ranked decile_nor = DecileRank(ranked_nor.copy()) decile_swe = DecileRank(ranked_swe.copy()) decile_dk = DecileRank(ranked_dk.copy()) decile_fin = DecileRank(ranked_fin.copy()) ######## Long/Short Portfolio Construction ########### def PortConstr(Deciles): months = Deciles.index.tolist() long_port = dict([(key, []) for key in months]) #Dictionary to save the portfolio every month short_port = dict([(key, []) for key in months]) for i in range(len(months)): # Get the highest and lowest decile stocks longs = Deciles.loc[months[i]][Deciles.loc[months[i]] >= winner_cut].index.tolist() shorts = Deciles.loc[months[i]][Deciles.loc[months[i]] <= loser_cut].index.tolist() long_port[months[i]] = longs # Save the stock names to the dictionaries short_port[months[i]] = shorts long_portfolios = {k: v for k, v in long_port.items() if v} short_portfolios = {k: v for k, v in short_port.items() if v} return long_portfolios, short_portfolios long_port_nor = PortConstr(decile_nor)[0]; short_port_nor = PortConstr(decile_nor)[1] long_port_swe = PortConstr(decile_swe)[0]; short_port_swe = PortConstr(decile_swe)[1] long_port_dk = PortConstr(decile_dk)[0]; short_port_dk = PortConstr(decile_dk)[1] long_port_fin = PortConstr(decile_fin)[0]; short_port_fin = PortConstr(decile_fin)[1] os.chdir(dir_cf) ######## Save the dictionaries as CSV ######### dict_dict = {'long_port_nor':long_port_nor, 'short_port_nor':short_port_nor, 'long_port_swe':long_port_swe, 'short_port_swe':short_port_swe, 'long_port_dk':long_port_dk, 'short_port_dk':short_port_dk, 'long_port_fin':long_port_fin, 'short_port_fin':short_port_fin} for name,dict_ in dict_dict.items(): with open('CF {}.json'.format(name), 'w') as fp: json.dump(dict_, fp) ## For the sector analysis - Spør Jama os.chdir(dir_sect) for name,dict_ in dict_dict.items(): with open('CF {}.json'.format(name), 'w') as fp: json.dump(dict_, fp) import os import numpy as np import pandas as pd from plotly.offline import plot import plotly.graph_objs as go from plotly.subplots import make_subplots import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_mom = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Momentum' dir_plots = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plots' dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_returns = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns' dir_stats = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Stats dataframes all' ''' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' dir_mom = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Momentum' dir_plots = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plots' dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' dir_returns = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns' dir_stats = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Stats dataframes all' ''' ### Import clean data sets ##### os.chdir(dir_clean) me_nor = pd.read_csv('Price Norway ME clean.csv', index_col=0); me_nor.index = pd.to_datetime(me_nor.index); me_swe = pd.read_csv('Price Sweden ME clean.csv', index_col=0); me_swe.index = pd.to_datetime(me_swe.index); me_dk = pd.read_csv('Price Denmark ME clean.csv', index_col=0); me_dk.index = pd.to_datetime(me_dk.index); me_fin = pd.read_csv('Price Finland ME clean.csv', index_col=0); me_fin.index = pd.to_datetime(me_fin.index); ### 10-year (Government bonds annual average) ### rfr = pd.read_csv('Tbill clean.csv', index_col=0); rfr.index = pd.to_datetime(rfr.index); ####### Parameters ###### form_period = [6, 12] holding_period =[1, 3, 6] ###### Save stats to ####### position = ['Winners (P10)', 'Losers (P1)', 'Cash-Neutral (P10 - P1)'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats = pd.DataFrame(pd.concat([df_stats]*len(form_period),axis=1, keys=form_period)) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio', 'Formation','Holding'] ###### Transaction costs ###### trans_cost = 0.003 # (0.3%) def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),hold): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### iterate through the different formation periods #### for form in form_period: ######## Load portfolios (dictionaries) from JSON ######### os.chdir(dir_mom) dict_dict = {'long_port_nor', 'short_port_nor', 'long_port_swe', 'short_port_swe', 'long_port_dk', 'short_port_dk', 'long_port_fin', 'short_port_fin'} for name in dict_dict: with open('Mom 2-{}M {}.json'.format(form,name), 'r') as fp: locals()[name] = json.load(fp) long_port_nor.pop('2019-12-31'); short_port_nor.pop('2019-12-31'); long_port_swe.pop('2019-12-31'); short_port_swe.pop('2019-12-31'); long_port_dk.pop('2019-12-31'); short_port_dk.pop('2019-12-31'); long_port_fin.pop('2019-12-31'); short_port_fin.pop('2019-12-31'); ####### iterate through the different holding periods #### for hold in holding_period: # Delete all the between the holding periods def HoldPeriod(Portfolio): count=1 for key, values in list(Portfolio.items()): if count == 1: copy_port = Portfolio[key] count += 1 elif count < hold: Portfolio[key] = copy_port count += 1 elif count == hold: Portfolio[key] = copy_port count =1 return Portfolio long_port_nor_c = HoldPeriod(long_port_nor.copy()) long_port_swe_c = HoldPeriod(long_port_swe.copy()) long_port_dk_c = HoldPeriod(long_port_dk.copy()) long_port_fin_c = HoldPeriod(long_port_fin.copy()) short_port_nor_c = HoldPeriod(short_port_nor.copy()) short_port_swe_c = HoldPeriod(short_port_swe.copy()) short_port_dk_c = HoldPeriod(short_port_dk.copy()) short_port_fin_c = HoldPeriod(short_port_fin.copy()) ### Calculate simple returns ### def PortfolioReturns(prices_me, long, short): # Calculate the returns of all stocks returns = prices_me/prices_me.shift(1) - 1 # Create a dataframe to save the portfolio returns dates_invested = list(long.keys()) df_returns = pd.DataFrame(np.random.random_sample((len(dates_invested), 3)), index=dates_invested, columns=position) df_returns.index.names = ['date'] for i in range(len(dates_invested)): # Get current date and associated returns current_date = dates_invested[i] long_pos = long[current_date] short_pos = short[current_date] # Calculate the returns for the given month long_ret = returns.shift(-1).loc[current_date, long_pos].sum()/len(long_pos) short_ret = returns.shift(-1).loc[current_date, short_pos].sum()/len(short_pos) # Save to df_returns df_returns.loc[current_date, position[0]] = long_ret df_returns.loc[current_date, position[1]] = short_ret df_returns.loc[current_date, position[2]] = long_ret - short_ret return df_returns.shift(1) # to correct dates for ret (2019.07.31 --> 2019.10.31) # remember that len(invested) = length of dictionaries returns_nor = PortfolioReturns(me_nor.copy(), long_port_nor_c.copy(), short_port_nor_c.copy()) returns_swe = PortfolioReturns(me_swe.copy(), long_port_swe_c.copy(), short_port_swe_c.copy()) returns_dk = PortfolioReturns(me_dk.copy(),long_port_dk_c.copy(), short_port_dk_c.copy()) returns_fin = PortfolioReturns(me_fin.copy(),long_port_fin_c.copy(), short_port_fin_c.copy()) def ReturnStart(returns): returns = returns.loc['1991-03-29':] returns.iloc[0] = np.nan return returns returns_nor = ReturnStart(returns_nor.copy()) returns_swe = ReturnStart(returns_swe.copy()) returns_dk = ReturnStart(returns_dk.copy()) returns_fin = ReturnStart(returns_fin.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns[position[0]] = returns[position[0]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[1]] = returns[position[1]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[2]] = returns[position[2]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,form,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,form,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,form,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,form,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,form,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,form,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,form,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,form,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,form,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,form,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,form,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,form,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,form,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,form,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,form,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,form,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,form,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,form,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,form,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,form,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,form,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,form,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,form,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,form,hold] = max_dd_fin ######################### Plot comulative returns ######################### os.chdir(dir_plots) ## Create the dataframe to plot pos_x = pd.DataFrame({'Norway':cum_nor[pos], 'Sweden':cum_swe[pos], 'Denmark':cum_dk[pos], 'Finland':cum_fin[pos]}) pos_x.index.rename = 'Date' pos_x_ret = pd.DataFrame({'Norway':returns_nor_100[pos], 'Sweden':returns_swe_100[pos], 'Denmark':returns_dk_100[pos], 'Finland':returns_fin_100[pos]}) pos_x_ret.index.rename = 'Date' ## Plot the fig and save fig = make_subplots(rows=2, cols=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Norway'], mode='lines', name='Norway ({}%)'.format(ann_return_nor), line_color="crimson"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Sweden'], mode='lines', name='Sweden ({}%)'.format(ann_return_swe), line_color= "green"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Denmark'], mode='lines', name='Denmark ({}%)'.format(ann_return_dk), line_color="blue"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Finland'],mode='lines', name='Finland ({}%)'.format(ann_return_fin), line_color="goldenrod"),row=1, col=1) # Add subplot for returns fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Norway'], mode='lines', name='NOR ret (Max DD: {}%)'.format(max_dd_nor), line_color="crimson"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Sweden'], mode='lines', name='SWE ret (Max DD: {}%)'.format(max_dd_swe), line_color= "green"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Denmark'], mode='lines', name='DK ret (Max DD: {}%)'.format(max_dd_dk), line_color="blue"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Finland'], mode='lines', name='FIN ret (Max DD: {}%)'.format(max_dd_fin), line_color="goldenrod"),row=2, col=1) fig.update_layout(legend_orientation="h") fig.update_layout(yaxis_type="log") fig.update_layout(title="F2-{}M, H{} {}-Returns".format(form,hold,pos), xaxis_title="Date", yaxis_title="Cumulative Return (%)", font=dict(family="Courier New, monospace", size=12, color="#7f7f7f")) fig.update_yaxes(title_text="Monthly return (%)", row=2, col=1) #fig.update_xaxes(title_text="Date", row=2, col=1) plot(fig, auto_open=False, filename='F2-{}M, H{} {}-Returns.html'.format(form,hold,pos)) ##### Save dfs #### if (form == 6 and hold == 1): os.chdir(dir_df) max_dd_rolling_nor = round(MaxDD(cum_nor.copy())*100, 2) max_dd_rolling_swe = round(MaxDD(cum_swe.copy())*100, 2) max_dd_rolling_dk = round(MaxDD(cum_dk.copy())*100, 2) max_dd_rolling_fin = round(MaxDD(cum_fin.copy())*100, 2) #Save stats computed with returns (Returns, Sharpe, Sortino, STD) df_list = [returns_nor, returns_swe, returns_dk, returns_fin] name_list = ['returns_nor_mom_F{}-H{}'.format(form, hold), 'returns_swe_mom_F{}-H{}'.format(form, hold), 'returns_dk_mom_F{}-H{}'.format(form, hold), 'returns_fin_mom_F{}-H{}'.format(form, hold)] for i in range(len(df_list)): df_list[i].to_pickle(name_list[i]+'.pkl') #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin, max_dd_rolling_nor, max_dd_rolling_swe, max_dd_rolling_dk, max_dd_rolling_fin, returns_nor, returns_swe, returns_dk, returns_fin] name_list_raw = ['cum_nor_mom_F{}-H{}'.format(form, hold), 'cum_swe_mom_F{}-H{}'.format(form, hold), 'cum_dk_mom_F{}-H{}'.format(form, hold), 'cum_fin_mom_F{}-H{}'.format(form, hold), 'max_dd_rolling_nor_mom_F{}-H{}'.format(form, hold), 'max_dd_rolling_swe_mom_F{}-H{}'.format(form, hold), 'max_dd_rolling_dk_mom_F{}-H{}'.format(form, hold), 'max_dd_rolling_fin_mom_F{}-H{}'.format(form, hold), 'returns_nor_mom_F{}-H{}'.format(form, hold), 'returns_swe_mom_F{}-H{}'.format(form, hold), 'returns_dk_mom_F{}-H{}'.format(form, hold), 'returns_fin_mom_F{}-H{}'.format(form, hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') ######## Write clean datasets to CSV ######### os.getcwd() os.chdir(dir_returns) writer = pd.ExcelWriter('Statistics Momentum.xlsx', engine='xlsxwriter') for pos in position: stats.loc[:][pos].to_excel(writer, sheet_name = '{}'.format(pos)) workbook = writer.book worksheet = writer.sheets[pos] #list of rows with High value marked in green (Returns, Sharpe, Sortino and Max DD) color_range_high_green = ["C4:L4", "C6:L6", "C7:L7", "C8:L8", "C9:L9", "C11:L11", "C12:L12", "C13:L13", "C14:L14", "C16:L16", "C17:L17", "C18:L18", "C19:L19", "C21:L21", "C22:L22", "C23:L23"] #list of rows with LOW value marked in green (Volatility) color_range_low_green = ["C5:L5" ,"C10:L10" , "C15:L15", "C20:L20"] # Add a format. Light red fill with dark red text. formatred = workbook.add_format({'bg_color': '#FFC7CE','font_color': '#9C0006'}) # Add a format. Green fill with dark green text. formatgreen = workbook.add_format({'bg_color': '#C6EFCE','font_color': '#006100'}) #Highlight top 2 rows green and bottom 2 red for i in color_range_high_green: worksheet.conditional_format(i, {'type': 'top','value': '2','format': formatgreen}) worksheet.conditional_format(i, {'type': 'bottom','value': '2','format': formatred}) #Highlight bottom 2 rows green and top 2 red for j in color_range_low_green: worksheet.conditional_format(j, {'type': 'top','value': '2','format': formatred}) worksheet.conditional_format(j, {'type': 'bottom','value': '2','format': formatgreen}) #print(stats.loc[:][pos].to_latex(multicolumn=True, multirow=True)) writer.save() #%% ##### LATEX CODE #### os.chdir(dir_stats) stats.to_pickle('stats_mom.pkl') stats_msci = pd.read_pickle('stats_msci.pkl') stats_mom = stats strategy = ['Panel A: Momentum', ''] latex_mom = pd.DataFrame(pd.concat([stats_mom, stats_msci],axis=1, keys= strategy)) #print(latex_mom.to_latex(multicolumn=True, multirow=True)) import os import numpy as np import pandas as pd import json from plotly.offline import plot from plotly.subplots import make_subplots import plotly.graph_objs as go import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_bm = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/Book-to-Market' dir_plots = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plots' dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_returns = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns' dir_stats = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Stats dataframes all' ''' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' dir_bm = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Value\\Book-to-Market' dir_plots = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plots' dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' dir_returns = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns' dir_stats = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Stats dataframes all' ''' ### Import clean data sets ##### os.chdir(dir_clean) me_nor = pd.read_csv('Price Norway ME clean.csv', index_col=0); me_nor.index = pd.to_datetime(me_nor.index); me_swe = pd.read_csv('Price Sweden ME clean.csv', index_col=0); me_swe.index = pd.to_datetime(me_swe.index); me_dk = pd.read_csv('Price Denmark ME clean.csv', index_col=0); me_dk.index = pd.to_datetime(me_dk.index); me_fin = pd.read_csv('Price Finland ME clean.csv', index_col=0); me_fin.index = pd.to_datetime(me_fin.index); ### 10-year (Government bonds annual average) ### rfr = pd.read_csv('Tbill clean.csv', index_col=0); rfr.index = pd.to_datetime(rfr.index); ####### Parameters ###### holding_period = [6, 9, 12] ###### Save stats to ####### position = ['Cheap (P10)', 'Expensive (P1)', 'Cash-Neutral (P10 - P1)'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio','Holding'] ###### Transaction costs ###### trans_cost = 0.003 # (0.3%) def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),hold): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### iterate through the different basis periods #### ######## Load portfolios (dictionaries) from JSON ######### os.chdir(dir_bm) dict_dict = {'long_port_nor', 'short_port_nor', 'long_port_swe', 'short_port_swe', 'long_port_dk', 'short_port_dk', 'long_port_fin', 'short_port_fin'} for name in dict_dict: with open('BM {}.json'.format(name), 'r') as fp: locals()[name] = json.load(fp) long_port_nor.pop('2019-12-31'); short_port_nor.pop('2019-12-31'); long_port_swe.pop('2019-12-31'); short_port_swe.pop('2019-12-31'); long_port_dk.pop('2019-12-31'); short_port_dk.pop('2019-12-31'); long_port_fin.pop('2019-12-31'); short_port_fin.pop('2019-12-31'); ####### iterate through the different holding periods #### for hold in holding_period: # Delete all the between the holding periods def HoldPeriod(Portfolio): count=1 for key, values in list(Portfolio.items()): if count == 1: copy_port = Portfolio[key] count += 1 elif count < hold: Portfolio[key] = copy_port count += 1 elif count == hold: Portfolio[key] = copy_port count =1 return Portfolio long_port_nor_c = HoldPeriod(long_port_nor.copy()) long_port_swe_c = HoldPeriod(long_port_swe.copy()) long_port_dk_c = HoldPeriod(long_port_dk.copy()) long_port_fin_c = HoldPeriod(long_port_fin.copy()) short_port_nor_c = HoldPeriod(short_port_nor.copy()) short_port_swe_c = HoldPeriod(short_port_swe.copy()) short_port_dk_c = HoldPeriod(short_port_dk.copy()) short_port_fin_c = HoldPeriod(short_port_fin.copy()) ### Calculate simple returns ### def PortfolioReturns(prices_me, long, short): # Calculate the returns of all stocks returns = prices_me/prices_me.shift(1) - 1 # Create a dataframe to save the portfolio returns dates_invested = list(long.keys()) df_returns = pd.DataFrame(np.random.random_sample((len(dates_invested), 3)), index=dates_invested, columns=position) df_returns.index.names = ['date'] for i in range(len(dates_invested)): # Get current date and associated returns current_date = dates_invested[i] long_pos = long[current_date] short_pos = short[current_date] # Calculate the returns for the given month long_ret = returns.shift(-1).loc[current_date, long_pos].sum()/len(long_pos) short_ret = returns.shift(-1).loc[current_date, short_pos].sum()/len(short_pos) # Save to df_returns df_returns.loc[current_date, position[0]] = long_ret df_returns.loc[current_date, position[1]] = short_ret df_returns.loc[current_date, position[2]] = long_ret - short_ret return df_returns.shift(1) # to correct dates for ret (2019.07.31 --> 2019.10.31) # remember that len(invested) = length of dictionaries returns_nor = PortfolioReturns(me_nor.copy(), long_port_nor_c.copy(), short_port_nor_c.copy()) returns_swe = PortfolioReturns(me_swe.copy(), long_port_swe_c.copy(), short_port_swe_c.copy()) returns_dk = PortfolioReturns(me_dk.copy(), long_port_dk_c.copy(), short_port_dk_c.copy()) returns_fin = PortfolioReturns(me_fin.copy(), long_port_fin_c.copy(), short_port_fin_c.copy()) def ReturnStart(returns): returns = returns.loc['1991-03-29':] returns.iloc[0] = np.nan return returns returns_nor = ReturnStart(returns_nor.copy()) returns_swe = ReturnStart(returns_swe.copy()) returns_dk = ReturnStart(returns_dk.copy()) returns_fin = ReturnStart(returns_fin.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns[position[0]] = returns[position[0]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[1]] = returns[position[1]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[2]] = returns[position[2]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,hold] = max_dd_fin #################### Plot comulative returns ################### os.chdir(dir_plots) ## Create the dataframe to plot pos_x = pd.DataFrame({'Norway':cum_nor[pos], 'Sweden':cum_swe[pos], 'Denmark':cum_dk[pos], 'Finland':cum_fin[pos]}) pos_x.index.rename = 'Date' pos_x_ret = pd.DataFrame({'Norway':returns_nor_100[pos], 'Sweden':returns_swe_100[pos], 'Denmark':returns_dk_100[pos], 'Finland':returns_fin_100[pos]}) pos_x_ret.index.rename = 'Date' ## Plot the fig and save fig = make_subplots(rows=2, cols=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Norway'], mode='lines', name='Norway ({}%)'.format(ann_return_nor), line_color="crimson"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Sweden'], mode='lines', name='Sweden ({}%)'.format(ann_return_swe), line_color= "green"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Denmark'], mode='lines', name='Denmark ({}%)'.format(ann_return_dk), line_color="blue"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Finland'],mode='lines', name='Finland ({}%)'.format(ann_return_fin), line_color="goldenrod"),row=1, col=1) # Add subplot for returns fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Norway'], mode='lines', name='NOR ret (Max DD: {}%)'.format(max_dd_nor), line_color="crimson"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Sweden'], mode='lines', name='SWE ret (Max DD: {}%)'.format(max_dd_swe), line_color= "green"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Denmark'], mode='lines', name='DK ret (Max DD: {}%)'.format(max_dd_dk), line_color="blue"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Finland'], mode='lines', name='FIN ret (Max DD: {}%)'.format(max_dd_fin), line_color="goldenrod"),row=2, col=1) fig.update_layout(legend_orientation="h") fig.update_layout(yaxis_type="log") fig.update_layout(title="BM, H{} {}-Returns".format(hold,pos), xaxis_title="Date", yaxis_title="Cumulative Return (%)", font=dict(family="Courier New, monospace", size=12, color="#7f7f7f")) fig.update_yaxes(title_text="Monthly return (%)", row=2, col=1) #fig.update_xaxes(title_text="Date", row=2, col=1) plot(fig, auto_open=False, filename='BM, H{} {}-Returns.html'.format(hold,pos)) ##### Save dfs #### if (hold == 12): os.chdir(dir_df) max_dd_rolling_nor = round(MaxDD(cum_nor.copy())*100, 2) max_dd_rolling_swe = round(MaxDD(cum_swe.copy())*100, 2) max_dd_rolling_dk = round(MaxDD(cum_dk.copy())*100, 2) max_dd_rolling_fin = round(MaxDD(cum_fin.copy())*100, 2) #Save stats computed with returns (Returns, Sharpe, Sortino, STD) df_list = [returns_nor, returns_swe, returns_dk, returns_fin] name_list = ['returns_nor_bm_H{}'.format(hold), 'returns_swe_bm_H{}'.format(hold), 'returns_dk_bm_H{}'.format(hold), 'returns_fin_bm_H{}'.format(hold)] for i in range(len(df_list)): df_list[i].to_pickle(name_list[i]+'.pkl') #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin, max_dd_rolling_nor, max_dd_rolling_swe, max_dd_rolling_dk, max_dd_rolling_fin, returns_nor, returns_swe, returns_dk, returns_fin] name_list_raw = ['cum_nor_bm_H{}'.format(hold), 'cum_swe_bm_H{}'.format(hold), 'cum_dk_bm_H{}'.format(hold), 'cum_fin_bm_H{}'.format(hold), 'max_dd_rolling_nor_bm_H{}'.format(hold), 'max_dd_rolling_swe_bm_H{}'.format(hold), 'max_dd_rolling_dk_bm_H{}'.format(hold), 'max_dd_rolling_fin_bm_H{}'.format(hold), 'returns_nor_bm_H{}'.format(hold), 'returns_swe_bm_H{}'.format(hold), 'returns_dk_bm_H{}'.format(hold), 'returns_fin_bm_H{}'.format(hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') ######## Write clean datasets to CSV ######### os.chdir(dir_returns) writer = pd.ExcelWriter('Statistics BM Value.xlsx', engine='xlsxwriter') for pos in position: stats.loc[:][pos].to_excel(writer, sheet_name = '{}'.format(pos)) workbook = writer.book worksheet = writer.sheets[pos] #list of rows with High value marked in green (Returns, Sharpe, Sortino and Max DD) color_range_high_green = ["C2:G2", "C4:G4", "C5:G5" ,"C6:G6", "C7:G7", "C9:G9", "C10:G10" ,"C11:G11", "C12:G12", "C14:G14", "C15:G15", "C16:G16", "C17:G17", "C19:G19", "C20:G20", "C21:G21"] #list of rows with LOW value marked in green (Volatility) color_range_low_green = ["C3:G3", "C8:G8", "C13:G13", "C18:G18"] # Add a format. Light red fill with dark red text. formatred = workbook.add_format({'bg_color': '#FFC7CE','font_color': '#9C0006'}) # Add a format. Green fill with dark green text. formatgreen = workbook.add_format({'bg_color': '#C6EFCE','font_color': '#006100'}) #Highlight top row green and bottom red for i in color_range_high_green: worksheet.conditional_format(i, {'type': 'top','value': '1','format': formatgreen}) worksheet.conditional_format(i, {'type': 'bottom','value': '1','format': formatred}) #Highlight bottom row green and top red for j in color_range_low_green: worksheet.conditional_format(j, {'type': 'top','value': '1','format': formatred}) worksheet.conditional_format(j, {'type': 'bottom','value': '1','format': formatgreen}) #print(stats.loc[:][pos].to_latex(multicolumn=True, multirow=True)) writer.save() #%% #### LATEX CODE #### os.chdir(dir_stats) stats.to_pickle('stats_bm.pkl') import os import numpy as np import pandas as pd import json from plotly.offline import plot from plotly.subplots import make_subplots import plotly.graph_objs as go import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_cf = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/CF-to-Market' dir_plots = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plots' dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_returns = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns' dir_stats = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Stats dataframes all' ''' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' dir_cf = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Value\\CF-to-Market' dir_plots = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plots' dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' dir_returns = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns' dir_stats = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Stats dataframes all' ''' ### Import clean data sets ##### os.chdir(dir_clean) me_nor = pd.read_csv('Price Norway ME clean.csv', index_col=0); me_nor.index = pd.to_datetime(me_nor.index); me_swe = pd.read_csv('Price Sweden ME clean.csv', index_col=0); me_swe.index = pd.to_datetime(me_swe.index); me_dk = pd.read_csv('Price Denmark ME clean.csv', index_col=0); me_dk.index = pd.to_datetime(me_dk.index); me_fin = pd.read_csv('Price Finland ME clean.csv', index_col=0); me_fin.index = pd.to_datetime(me_fin.index); ### 10-year (Government bonds annual average) ### rfr = pd.read_csv('Tbill clean.csv', index_col=0); rfr.index = pd.to_datetime(rfr.index); ####### Parameters ###### holding_period = [6, 9, 12] ###### Save stats to ####### position = ['Cheap (P10)', 'Expensive (P1)', 'Cash-Neutral (P10 - P1)'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio','Holding'] ###### Transaction costs ###### trans_cost = 0.003 # (0.3%) def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),hold): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### iterate through the different basis periods #### ######## Load portfolios (dictionaries) from JSON ######### os.chdir(dir_cf) dict_dict = {'long_port_nor', 'short_port_nor', 'long_port_swe', 'short_port_swe', 'long_port_dk', 'short_port_dk', 'long_port_fin', 'short_port_fin'} for name in dict_dict: with open('CF {}.json'.format(name), 'r') as fp: locals()[name] = json.load(fp) long_port_nor.pop('2019-12-31'); short_port_nor.pop('2019-12-31'); long_port_swe.pop('2019-12-31'); short_port_swe.pop('2019-12-31'); long_port_dk.pop('2019-12-31'); short_port_dk.pop('2019-12-31'); long_port_fin.pop('2019-12-31'); short_port_fin.pop('2019-12-31'); ####### iterate through the different holding periods #### for hold in holding_period: # Delete all the between the holding periods def HoldPeriod(Portfolio): count=1 for key, values in list(Portfolio.items()): if count == 1: copy_port = Portfolio[key] count += 1 elif count < hold: Portfolio[key] = copy_port count += 1 elif count == hold: Portfolio[key] = copy_port count =1 return Portfolio long_port_nor_c = HoldPeriod(long_port_nor.copy()) long_port_swe_c = HoldPeriod(long_port_swe.copy()) long_port_dk_c = HoldPeriod(long_port_dk.copy()) long_port_fin_c = HoldPeriod(long_port_fin.copy()) short_port_nor_c = HoldPeriod(short_port_nor.copy()) short_port_swe_c = HoldPeriod(short_port_swe.copy()) short_port_dk_c = HoldPeriod(short_port_dk.copy()) short_port_fin_c = HoldPeriod(short_port_fin.copy()) ### Calculate simple returns ### def PortfolioReturns(prices_me, long, short): # Calculate the returns of all stocks returns = prices_me/prices_me.shift(1) - 1 # Create a dataframe to save the portfolio returns dates_invested = list(long.keys()) df_returns = pd.DataFrame(np.random.random_sample((len(dates_invested), 3)), index=dates_invested, columns=position) df_returns.index.names = ['date'] for i in range(len(dates_invested)): # Get current date and associated returns current_date = dates_invested[i] long_pos = long[current_date] short_pos = short[current_date] # Calculate the returns for the given month long_ret = returns.shift(-1).loc[current_date, long_pos].sum()/len(long_pos) short_ret = returns.shift(-1).loc[current_date, short_pos].sum()/len(short_pos) # Save to df_returns df_returns.loc[current_date, position[0]] = long_ret df_returns.loc[current_date, position[1]] = short_ret df_returns.loc[current_date, position[2]] = long_ret - short_ret return df_returns.shift(1) returns_nor = PortfolioReturns(me_nor.copy(), long_port_nor_c.copy(), short_port_nor_c.copy()) returns_swe = PortfolioReturns(me_swe.copy(), long_port_swe_c.copy(), short_port_swe_c.copy()) returns_dk = PortfolioReturns(me_dk.copy(), long_port_dk_c.copy(), short_port_dk_c.copy()) returns_fin = PortfolioReturns(me_fin.copy(), long_port_fin_c.copy(), short_port_fin_c.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns[position[0]] = returns[position[0]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[1]] = returns[position[1]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[2]] = returns[position[2]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,hold] = max_dd_fin #################### Plot comulative returns ################### os.chdir(dir_plots) ## Create the dataframe to plot pos_x = pd.DataFrame({'Norway':cum_nor[pos], 'Sweden':cum_swe[pos], 'Denmark':cum_dk[pos], 'Finland':cum_fin[pos]}) pos_x.index.rename = 'Date' pos_x_ret = pd.DataFrame({'Norway':returns_nor_100[pos], 'Sweden':returns_swe_100[pos], 'Denmark':returns_dk_100[pos], 'Finland':returns_fin_100[pos]}) pos_x_ret.index.rename = 'Date' ## Plot the fig and save fig = make_subplots(rows=2, cols=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Norway'], mode='lines', name='Norway ({}%)'.format(ann_return_nor), line_color="crimson"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Sweden'], mode='lines', name='Sweden ({}%)'.format(ann_return_swe), line_color= "green"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Denmark'], mode='lines', name='Denmark ({}%)'.format(ann_return_dk), line_color="blue"),row=1, col=1) fig.add_trace(go.Scatter(x= pos_x.index, y=pos_x['Finland'],mode='lines', name='Finland ({}%)'.format(ann_return_fin), line_color="goldenrod"),row=1, col=1) # Add subplot for returns fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Norway'], mode='lines', name='NOR ret (Max DD: {}%)'.format(max_dd_nor), line_color="crimson"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Sweden'], mode='lines', name='SWE ret (Max DD: {}%)'.format(max_dd_swe), line_color= "green"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Denmark'], mode='lines', name='DK ret (Max DD: {}%)'.format(max_dd_dk), line_color="blue"),row=2, col=1) fig.add_trace(go.Scatter(x= pos_x_ret.index, y=pos_x_ret['Finland'], mode='lines', name='FIN ret (Max DD: {}%)'.format(max_dd_fin), line_color="goldenrod"),row=2, col=1) fig.update_layout(legend_orientation="h") fig.update_layout(yaxis_type="log") fig.update_layout(title="CF, H{} {}-Returns".format(hold,pos), xaxis_title="Date", yaxis_title="Cumulative Return (%)", font=dict(family="Courier New, monospace", size=12, color="#7f7f7f")) fig.update_yaxes(title_text="Monthly return (%)", row=2, col=1) #fig.update_xaxes(title_text="Date", row=2, col=1) plot(fig, auto_open=False, filename='CF, H{} {}-Returns.html'.format(hold,pos)) ##### Save dfs #### if (hold == 6): os.chdir(dir_df) max_dd_rolling_nor = round(MaxDD(cum_nor.copy())*100, 2) max_dd_rolling_swe = round(MaxDD(cum_swe.copy())*100, 2) max_dd_rolling_dk = round(MaxDD(cum_dk.copy())*100, 2) max_dd_rolling_fin = round(MaxDD(cum_fin.copy())*100, 2) #Save stats computed with returns (Returns, Sharpe, Sortino, STD) df_list = [returns_nor, returns_swe, returns_dk, returns_fin] name_list = ['returns_nor_cf_H{}'.format(hold), 'returns_swe_cf_H{}'.format(hold), 'returns_dk_cf_H{}'.format(hold), 'returns_fin_cf_H{}'.format(hold)] for i in range(len(df_list)): df_list[i].to_pickle(name_list[i]+'.pkl') #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin, max_dd_rolling_nor, max_dd_rolling_swe, max_dd_rolling_dk, max_dd_rolling_fin, returns_nor, returns_swe, returns_dk, returns_fin] name_list_raw = ['cum_nor_cf_H{}'.format(hold), 'cum_swe_cf_H{}'.format(hold), 'cum_dk_cf_H{}'.format(hold), 'cum_fin_cf_H{}'.format(hold), 'max_dd_rolling_nor_cf_H{}'.format(hold), 'max_dd_rolling_swe_cf_H{}'.format(hold), 'max_dd_rolling_dk_cf_H{}'.format(hold), 'max_dd_rolling_fin_cf_H{}'.format(hold), 'returns_nor_cf_H{}'.format(hold), 'returns_swe_cf_H{}'.format(hold), 'returns_dk_cf_H{}'.format(hold), 'returns_fin_cf_H{}'.format(hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') ######## Write clean datasets to CSV ######### os.getcwd() os.chdir(dir_returns) writer = pd.ExcelWriter('Statistics CF Value.xlsx', engine='xlsxwriter') for pos in position: stats.loc[:][pos].to_excel(writer, sheet_name = '{}'.format(pos)) workbook = writer.book worksheet = writer.sheets[pos] #list of rows with High value marked in green (Returns, Sharpe, Sortino and Max DD) color_range_high_green = ["C2:G2", "C4:G4", "C5:G5" ,"C6:G6", "C7:G7", "C9:G9", "C10:G10" ,"C11:G11", "C12:G12", "C14:G14", "C15:G15", "C16:G16", "C17:G17", "C19:G19", "C20:G20", "C21:G21"] #list of rows with LOW value marked in green (Volatility) color_range_low_green = ["C3:G3", "C8:G8", "C13:G13", "C18:G18"] # Add a format. Light red fill with dark red text. formatred = workbook.add_format({'bg_color': '#FFC7CE','font_color': '#9C0006'}) # Add a format. Green fill with dark green text. formatgreen = workbook.add_format({'bg_color': '#C6EFCE','font_color': '#006100'}) #Highlight top row green and bottom red for i in color_range_high_green: worksheet.conditional_format(i, {'type': 'top','value': '1','format': formatgreen}) worksheet.conditional_format(i, {'type': 'bottom','value': '1','format': formatred}) #Highlight bottom row green and top red for j in color_range_low_green: worksheet.conditional_format(j, {'type': 'top','value': '1','format': formatred}) worksheet.conditional_format(j, {'type': 'bottom','value': '1','format': formatgreen}) #print(stats.loc[:][pos].to_latex(multicolumn=True, multirow=True)) writer.save() #%% #### LATEX CODE #### os.chdir(dir_stats) stats.to_pickle('stats_cfm.pkl') stats_msci = pd.read_pickle('stats_msci.pkl') stats_msci.columns = stats_msci.columns.droplevel(1) stats_bm = pd.read_pickle('stats_bm.pkl') stats_cf = stats strategy = ['Panel B: Book-to-Market', 'CashFlow-to-Market', ''] #latex_value = pd.DataFrame(pd.concat([stats_bm, stats_cf, stats_msci],axis=1, keys= strategy)) #print(latex_value.to_latex(multicolumn=True, multirow=True)) import os import numpy as np import pandas as pd from plotly.offline import plot import plotly.graph_objs as go from plotly.subplots import make_subplots import json dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_plots = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plots' dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_returns = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns' dir_df_stats = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Stats dataframes all' dir_mom = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Momentum' dir_bm = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/Book-to-Market' dir_cf = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/CF-to-Market' ''' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' dir_mom = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Momentum' dir_plots = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plots' dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' dir_returns = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns' dir_df_stats = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Stats dataframes all' ''' ### Import clean data sets ##### os.chdir(dir_clean) me_nor = pd.read_csv('Price Norway ME clean.csv', index_col=0); me_nor.index = pd.to_datetime(me_nor.index); me_swe = pd.read_csv('Price Sweden ME clean.csv', index_col=0); me_swe.index = pd.to_datetime(me_swe.index); me_dk = pd.read_csv('Price Denmark ME clean.csv', index_col=0); me_dk.index = pd.to_datetime(me_dk.index); me_fin = pd.read_csv('Price Finland ME clean.csv', index_col=0); me_fin.index = pd.to_datetime(me_fin.index); ### 10-year (Government bonds annual average) ### rfr = pd.read_csv('Tbill clean.csv', index_col=0); rfr.index = pd.to_datetime(rfr.index); #%% MOMENTUM ####### Parameters ###### form_period = [6] holding_period =[1] ###### Save stats to ####### position = ['Winners (P10)', 'Losers (P1)', 'Cash-Neutral (P10 - P1)'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats = pd.DataFrame(pd.concat([df_stats]*len(form_period),axis=1, keys=form_period)) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio', 'Formation','Holding'] ###### Transaction costs ###### trans_cost = 0.003 # (0.3%) def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),hold): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### iterate through the different formation periods #### for form in form_period: ######## Load portfolios (dictionaries) from JSON ######### os.chdir(dir_mom) dict_dict = {'lp_nor', 'sp_nor', 'lp_swe', 'sp_swe','lp_dk', 'sp_dk','lp_fin', 'sp_fin'} for name in dict_dict: with open('Sect Mom {}.json'.format(name), 'r') as fp: locals()[name] = json.load(fp) lp_nor.pop('2019-12-31'); sp_nor.pop('2019-12-31'); lp_swe.pop('2019-12-31'); sp_swe.pop('2019-12-31'); lp_dk.pop('2019-12-31'); sp_dk.pop('2019-12-31'); lp_fin.pop('2019-12-31'); sp_fin.pop('2019-12-31'); ####### iterate through the different holding periods #### for hold in holding_period: # Delete all the between the holding periods def HoldPeriod(Portfolio): count=1 for key, values in list(Portfolio.items()): if count == 1: copy_port = Portfolio[key] count += 1 elif count < hold: Portfolio[key] = copy_port count += 1 elif count == hold: Portfolio[key] = copy_port count =1 return Portfolio long_port_nor_c = HoldPeriod(lp_nor.copy()) long_port_swe_c = HoldPeriod(lp_swe.copy()) long_port_dk_c = HoldPeriod(lp_dk.copy()) long_port_fin_c = HoldPeriod(lp_fin.copy()) short_port_nor_c = HoldPeriod(sp_nor.copy()) short_port_swe_c = HoldPeriod(sp_swe.copy()) short_port_dk_c = HoldPeriod(sp_dk.copy()) short_port_fin_c = HoldPeriod(sp_fin.copy()) ### Calculate simple returns ### def PortfolioReturns(prices_me, long, short): # Calculate the returns of all stocks returns = prices_me/prices_me.shift(1) - 1 # Create a dataframe to save the portfolio returns dates_invested = list(long.keys()) df_returns = pd.DataFrame(np.random.random_sample((len(dates_invested), 3)), index=dates_invested, columns=position) df_returns.index.names = ['date'] for i in range(len(dates_invested)): # Get current date and associated returns current_date = dates_invested[i] long_pos = long[current_date] short_pos = short[current_date] # Calculate the returns for the given month long_ret = returns.shift(-1).loc[current_date, long_pos].sum()/len(long_pos) short_ret = returns.shift(-1).loc[current_date, short_pos].sum()/len(short_pos) # Save to df_returns df_returns.loc[current_date, position[0]] = long_ret df_returns.loc[current_date, position[1]] = short_ret df_returns.loc[current_date, position[2]] = long_ret - short_ret return df_returns.shift(1) # to correct dates for ret (2019.07.31 --> 2019.10.31) # remember that len(invested) = length of dictionaries returns_nor = PortfolioReturns(me_nor.copy(), long_port_nor_c.copy(), short_port_nor_c.copy()) returns_swe = PortfolioReturns(me_swe.copy(), long_port_swe_c.copy(), short_port_swe_c.copy()) returns_dk = PortfolioReturns(me_dk.copy(),long_port_dk_c.copy(), short_port_dk_c.copy()) returns_fin = PortfolioReturns(me_fin.copy(),long_port_fin_c.copy(), short_port_fin_c.copy()) def ReturnStart(returns): returns = returns.loc['1991-03-29':] returns.iloc[0] = np.nan return returns returns_nor = ReturnStart(returns_nor.copy()) returns_swe = ReturnStart(returns_swe.copy()) returns_dk = ReturnStart(returns_dk.copy()) returns_fin = ReturnStart(returns_fin.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns[position[0]] = returns[position[0]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[1]] = returns[position[1]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[2]] = returns[position[2]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,form,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,form,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,form,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,form,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,form,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,form,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,form,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,form,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,form,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,form,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,form,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,form,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,form,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,form,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,form,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,form,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,form,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,form,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,form,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,form,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,form,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,form,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,form,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,form,hold] = max_dd_fin stats_mom = stats.copy() os.chdir(dir_df) #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin] name_list_raw = ['cum_nor_sect_mom_F{}-H{}'.format(form, hold), 'cum_swe_sect_mom_F{}-H{}'.format(form, hold), 'cum_dk_sect_mom_F{}-H{}'.format(form, hold), 'cum_fin_sect_mom_F{}-H{}'.format(form, hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') #%% Book-to-Market holding_period = [12] ###### Save stats to ####### position = ['Cheap (P10)', 'Expensive (P1)', 'Cash-Neutral (P10 - P1)'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio','Holding'] ###### Transaction costs ###### trans_cost = 0.003 # (0.3%) def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),hold): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### iterate through the different basis periods #### ######## Load portfolios (dictionaries) from JSON ######### os.chdir(dir_bm) dict_dict = {'lp_nor', 'sp_nor', 'lp_swe', 'sp_swe','lp_dk', 'sp_dk','lp_fin', 'sp_fin'} for name in dict_dict: with open('Sect BM {}.json'.format(name), 'r') as fp: locals()[name] = json.load(fp) lp_nor.pop('2019-12-31'); sp_nor.pop('2019-12-31'); lp_swe.pop('2019-12-31'); sp_swe.pop('2019-12-31'); lp_dk.pop('2019-12-31'); sp_dk.pop('2019-12-31'); lp_fin.pop('2019-12-31'); sp_fin.pop('2019-12-31'); ####### iterate through the different holding periods #### for hold in holding_period: # Delete all the between the holding periods def HoldPeriod(Portfolio): count=1 for key, values in list(Portfolio.items()): if count == 1: copy_port = Portfolio[key] count += 1 elif count < hold: Portfolio[key] = copy_port count += 1 elif count == hold: Portfolio[key] = copy_port count =1 return Portfolio long_port_nor_c = HoldPeriod(lp_nor.copy()) long_port_swe_c = HoldPeriod(lp_swe.copy()) long_port_dk_c = HoldPeriod(lp_dk.copy()) long_port_fin_c = HoldPeriod(lp_fin.copy()) short_port_nor_c = HoldPeriod(sp_nor.copy()) short_port_swe_c = HoldPeriod(sp_swe.copy()) short_port_dk_c = HoldPeriod(sp_dk.copy()) short_port_fin_c = HoldPeriod(sp_fin.copy()) ### Calculate simple returns ### def PortfolioReturns(prices_me, long, short): # Calculate the returns of all stocks returns = prices_me/prices_me.shift(1) - 1 # Create a dataframe to save the portfolio returns dates_invested = list(long.keys()) df_returns = pd.DataFrame(np.random.random_sample((len(dates_invested), 3)), index=dates_invested, columns=position) df_returns.index.names = ['date'] for i in range(len(dates_invested)): # Get current date and associated returns current_date = dates_invested[i] long_pos = long[current_date] short_pos = short[current_date] # Calculate the returns for the given month long_ret = returns.shift(-1).loc[current_date, long_pos].sum()/len(long_pos) short_ret = returns.shift(-1).loc[current_date, short_pos].sum()/len(short_pos) # Save to df_returns df_returns.loc[current_date, position[0]] = long_ret df_returns.loc[current_date, position[1]] = short_ret df_returns.loc[current_date, position[2]] = long_ret - short_ret return df_returns.shift(1) # to correct dates for ret (2019.07.31 --> 2019.10.31) # remember that len(invested) = length of dictionaries returns_nor = PortfolioReturns(me_nor.copy(), long_port_nor_c.copy(), short_port_nor_c.copy()) returns_swe = PortfolioReturns(me_swe.copy(), long_port_swe_c.copy(), short_port_swe_c.copy()) returns_dk = PortfolioReturns(me_dk.copy(), long_port_dk_c.copy(), short_port_dk_c.copy()) returns_fin = PortfolioReturns(me_fin.copy(), long_port_fin_c.copy(), short_port_fin_c.copy()) def ReturnStart(returns): returns = returns.loc['1991-03-29':] returns.iloc[0] = np.nan return returns returns_nor = ReturnStart(returns_nor.copy()) returns_swe = ReturnStart(returns_swe.copy()) returns_dk = ReturnStart(returns_dk.copy()) returns_fin = ReturnStart(returns_fin.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns[position[0]] = returns[position[0]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[1]] = returns[position[1]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[2]] = returns[position[2]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,hold] = max_dd_fin stats_bm = stats os.chdir(dir_df) #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin] name_list_raw = ['cum_nor_sect_bm_H{}'.format(hold), 'cum_swe_sect_bm_H{}'.format(hold), 'cum_dk_sect_bm_H{}'.format(hold), 'cum_fin_sect_bm_H{}'.format(hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') #%% Cash-Flow-to-Market holding_period = [6] ###### Save stats to ####### position = ['Cheap (P10)', 'Expensive (P1)', 'Cash-Neutral (P10 - P1)'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio','Holding'] ###### Transaction costs ###### trans_cost = 0.003 # (0.3%) def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),hold): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### iterate through the different basis periods #### ######## Load portfolios (dictionaries) from JSON ######### os.chdir(dir_cf) dict_dict = {'lp_nor', 'sp_nor', 'lp_swe', 'sp_swe','lp_dk', 'sp_dk','lp_fin', 'sp_fin'} for name in dict_dict: with open('Sect CFM {}.json'.format(name), 'r') as fp: locals()[name] = json.load(fp) lp_nor.pop('2019-12-31'); sp_nor.pop('2019-12-31'); lp_swe.pop('2019-12-31'); sp_swe.pop('2019-12-31'); lp_dk.pop('2019-12-31'); sp_dk.pop('2019-12-31'); lp_fin.pop('2019-12-31'); sp_fin.pop('2019-12-31'); ####### iterate through the different holding periods #### for hold in holding_period: # Delete all the between the holding periods def HoldPeriod(Portfolio): count=1 for key, values in list(Portfolio.items()): if count == 1: copy_port = Portfolio[key] count += 1 elif count < hold: Portfolio[key] = copy_port count += 1 elif count == hold: Portfolio[key] = copy_port count =1 return Portfolio long_port_nor_c = HoldPeriod(lp_nor.copy()) long_port_swe_c = HoldPeriod(lp_swe.copy()) long_port_dk_c = HoldPeriod(lp_dk.copy()) long_port_fin_c = HoldPeriod(lp_fin.copy()) short_port_nor_c = HoldPeriod(sp_nor.copy()) short_port_swe_c = HoldPeriod(sp_swe.copy()) short_port_dk_c = HoldPeriod(sp_dk.copy()) short_port_fin_c = HoldPeriod(sp_fin.copy()) ### Calculate simple returns ### def PortfolioReturns(prices_me, long, short): # Calculate the returns of all stocks returns = prices_me/prices_me.shift(1) - 1 # Create a dataframe to save the portfolio returns dates_invested = list(short.keys()) df_returns = pd.DataFrame(np.random.random_sample((len(dates_invested), 3)), index=dates_invested, columns=position) df_returns.index.names = ['date'] for i in range(len(dates_invested)): # Get current date and associated returns current_date = dates_invested[i] long_pos = long[current_date] short_pos = short[current_date] # Calculate the returns for the given month long_ret = returns.shift(-1).loc[current_date, long_pos].sum()/len(long_pos) short_ret = returns.shift(-1).loc[current_date, short_pos].sum()/len(short_pos) # Save to df_returns df_returns.loc[current_date, position[0]] = long_ret df_returns.loc[current_date, position[1]] = short_ret df_returns.loc[current_date, position[2]] = long_ret - short_ret return df_returns.shift(1) returns_nor = PortfolioReturns(me_nor.copy(), long_port_nor_c.copy(), short_port_nor_c.copy()) returns_swe = PortfolioReturns(me_swe.copy(), long_port_swe_c.copy(), short_port_swe_c.copy()) returns_dk = PortfolioReturns(me_dk.copy(), long_port_dk_c.copy(), short_port_dk_c.copy()) returns_fin = PortfolioReturns(me_fin.copy(), long_port_fin_c.copy(), short_port_fin_c.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns[position[0]] = returns[position[0]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[1]] = returns[position[1]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] excess_returns[position[2]] = returns[position[2]]-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,hold] = max_dd_fin stats_cfm = stats os.chdir(dir_df) #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin] name_list_raw = ['cum_nor_sect_cf_H{}'.format(hold), 'cum_swe_sect_cf_H{}'.format(hold), 'cum_dk_sect_cf_H{}'.format(hold), 'cum_fin_sect_cf_H{}'.format(hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') #%% LATEX CODE #### stats_mom.columns = stats_mom.columns.droplevel(1).droplevel(1) stats_bm.columns = stats_bm.columns.droplevel(1) stats_cfm.columns = stats_cfm.columns.droplevel(1) #%% os.chdir(dir_df_stats) stats_bm_ind = pd.read_pickle('stats_bm.pkl') stats_bm_ind = pd.read_pickle('stats_bm.pkl') strategy = ['Panel A: Momentum', 'Panel B: Book-to-Market', 'Panel C: CashFlow-to-Market'] latex_sect = pd.DataFrame(pd.concat([stats_mom, stats_bm, stats_cfm],axis=1, keys= strategy)) print(latex_sect.to_latex(multicolumn=True, multirow=True)) import os import numpy as np import pandas as pd import datetime as dt import matplotlib import matplotlib.pyplot as plt import seaborn as sns import math as math from math import exp, expm1 import re import cvxpy as cp #%% Importing the Returns dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' #dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_df_ff = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes\\FF Data' #dir_df_ff = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes/FF Data' os.chdir(dir_df) #Make list of all countries and strategies returns country_list = ['nor', 'swe', 'dk', 'fin'] mom_dict = {}; bm_dict = {}; cf_dict = {}; msci_dict = {} for c in country_list: ret_mom = pd.read_pickle('returns_{}_mom_F6-H1_raw.pkl'.format(c)); mom_dict[c] = ret_mom ret_bm = pd.read_pickle('returns_{}_bm_H12_raw.pkl'.format(c)); bm_dict[c] = ret_bm ret_cf = pd.read_pickle('returns_{}_cf_H6_raw.pkl'.format(c)); cf_dict[c] = ret_cf ret_msci = pd.read_pickle('returns_{}_msci_F1-H1_raw.pkl'.format(c)); msci_dict[c] = ret_msci # Get a dataframe of all the countries def MultiIndexDF(dictionary): reform = {(outerKey, innerKey): values for outerKey, innerDict in dictionary.items() for innerKey, values in innerDict.items()} df = pd.DataFrame(reform) return df mom_all = MultiIndexDF(mom_dict); bm_all = MultiIndexDF(bm_dict) cf_all = MultiIndexDF(cf_dict); msci_all = MultiIndexDF(msci_dict) #%% country = 'dk' P10 = pd.concat([mom_all.loc[:,(country, 'Cash-Neutral (P10 - P1)')], bm_all.loc[:,(country, 'Cash-Neutral (P10 - P1)')], cf_all.loc[:,(country, 'Cash-Neutral (P10 - P1)')]], keys = ['MOM', 'BM', 'CF'], axis = 1).iloc[52:,:] P10.corr() #%% Get the equal Weight Nordic Portfolios weights_eq = np.array([1/len(country_list)]*4) def NordicRetEqW(Data, Portfolio, weights): return (Data[:].xs(Portfolio, level = 1, axis=1)*weights).sum(axis = 1) nordic_mom_eq = pd.concat([NordicRetEqW(mom_all.copy(), 'Winners (P10)', weights_eq), NordicRetEqW(mom_all.copy(), 'Losers (P1)', weights_eq), (NordicRetEqW(mom_all.copy(), 'Winners (P10)', weights_eq)-NordicRetEqW(mom_all.copy(), 'Losers (P1)', weights_eq))], keys = ['MOM (P10)','MOM (P1)','MOM (P10 - P1)'], axis = 1) nordic_bm_eq = pd.concat([NordicRetEqW(bm_all.copy(), 'Cheap (P10)', weights_eq), NordicRetEqW(bm_all.copy(), 'Expensive (P1)', weights_eq), (NordicRetEqW(bm_all.copy(), 'Cheap (P10)', weights_eq)-NordicRetEqW(bm_all.copy(), 'Expensive (P1)', weights_eq))], keys = ['BM (P10)','BM (P1)','BM (P10 - P1)'], axis = 1) nordic_cf_eq = pd.concat([NordicRetEqW(cf_all.copy(), 'Cheap (P10)', weights_eq), NordicRetEqW(cf_all.copy(), 'Expensive (P1)', weights_eq), (NordicRetEqW(cf_all.copy(), 'Cheap (P10)', weights_eq)-NordicRetEqW(cf_all.copy(), 'Expensive (P1)', weights_eq))], keys = ['CF (P10)','CF (P1)','CF (P10 - P1)'], axis = 1) nordic_eq_weight = pd.concat([nordic_mom_eq, nordic_bm_eq, nordic_cf_eq], axis = 1) # %%%% Slice into Winners/Cheap and Losers/Expensive nordic_eq_w2 = nordic_eq_weight.copy() p10_nordic_eq = pd.concat([nordic_eq_w2.loc[:, 'MOM (P10)'], nordic_eq_w2.loc[:, 'BM (P10)'], nordic_eq_w2.loc[:, 'CF (P10)']], axis = 1).dropna(axis = 0) p1_nordic_start_eq = pd.concat([nordic_eq_w2.loc[:, 'MOM (P1)'], nordic_eq_w2.loc[:, 'BM (P1)'], nordic_eq_w2.loc[:, 'CF (P1)']], axis = 1).dropna(axis = 0) cn_nordic_start_eq = pd.concat([nordic_eq_w2.loc[:, 'MOM (P10 - P1)'], nordic_eq_w2.loc[:, 'BM (P10 - P1)'], nordic_eq_w2.loc[:, 'CF (P10 - P1)']], axis = 1).dropna(axis = 0) #%% Collect Fama & French data os.chdir(dir_df_ff) mom_df = pd.read_csv('Momentum .csv', sep=';', index_col=0); mom_df.index = pd.to_datetime(mom_df.index, format='%Y%m') bm_df = pd.read_csv('BM.csv', sep=';', index_col=0); bm_df.index = pd.to_datetime(bm_df.index, format='%Y%m') cf_df = pd.read_csv('CFP.csv', sep=';', index_col=0); cf_df.index = pd.to_datetime(cf_df.index, format='%Y%m') mom_cn = mom_df['Hi PRIOR'] - mom_df['Lo PRIOR']; bm_cn = bm_df['Hi 10'] - bm_df['Lo 10'] cf_cn = cf_df['Hi 10'] - cf_df['Lo 10'] mom = mom_df[['Hi PRIOR', 'Lo PRIOR']] mom_20 = pd.concat([mom.loc['1975-05-30':'1995-06-30'], mom_cn.loc['1975-05-30':'1995-06-30']], axis=1); mom_20 = mom_20.rename(columns={'Hi PRIOR':'P10', 'Lo PRIOR':'P1', 0:'MOM (P10 - P1)',}); mom_20 = mom_20/100 mom_30 = pd.concat([mom.loc['1965-05-30':'1995-06-30'], mom_cn.loc['1965-05-30':'1995-06-30']], axis=1); mom_30 = mom_30.rename(columns={'Hi PRIOR':'P10', 'Lo PRIOR':'P1', 0:'MOM (P10 - P1)',}); mom_30 = mom_30/100 bm = bm_df[['Hi 10', 'Lo 10']] bm_20 = pd.concat([bm.loc['1975-05-30':'1995-06-30'], bm_cn.loc['1975-05-30':'1995-06-30']], axis=1); bm_20 = bm_20.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'BM (P10 - P1)',}); bm_20 = bm_20/100 bm_30 = pd.concat([bm.loc['1965-05-30':'1995-06-30'], bm_cn.loc['1965-05-30':'1995-06-30']], axis=1); bm_30 = bm_30.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'BM (P10 - P1)',}); bm_30 = bm_30/100 cf = cf_df[['Hi 10', 'Lo 10']] cf_20 = pd.concat([cf.loc['1975-05-30':'1995-06-30'], cf_cn.loc['1975-05-30':'1995-06-30']], axis=1); cf_20 = cf_20.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'CF (P10 - P1)',}); cf_20 = cf_20/100 cf_30 = pd.concat([cf.loc['1965-05-30':'1995-06-30'], cf_cn.loc['1965-05-30':'1995-06-30']], axis=1); cf_30 = cf_30.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'CF (P10 - P1)',}); cf_30 = cf_30/100 #%% Slice into Winners/Cheap and Losers/Expensive cn_FF_20y = pd.concat([mom_20.loc[:, 'MOM (P10 - P1)'], bm_20.loc[:, 'BM (P10 - P1)'], cf_20.loc[:, 'CF (P10 - P1)']], axis = 1)#.dropna(axis = 0) cn_FF_30y = pd.concat([mom_30.loc[:, 'MOM (P10 - P1)'], bm_30.loc[:, 'BM (P10 - P1)'], cf_30.loc[:, 'CF (P10 - P1)']], axis = 1)#.dropna(axis = 0) #%% Kelly Optimalization on Portfolio level def KellyNordicEq(Data): Data = Data.reset_index(drop=True) V = Data.cov() V_inv = pd.DataFrame(np.linalg.pinv(V.values), V.columns, V.index) mean_ret = Data.mean(axis = 0) kelly_w = pd.DataFrame(0.25*np.dot(np.asanyarray(V_inv), np.asanyarray(mean_ret)), index = V.columns) for j in range(len(kelly_w)): if kelly_w.iloc[j,0] <= 0: kelly_w.iloc[j,0] = 0 return kelly_w #p10_nordic_strat_kelly = p10_nordic_eq.copy()*np.array(KellyNordicEq(p10_nordic_eq.copy())).T #p1_nordic_strat_kelly = p1_nordic_eq.copy()*np.array(KellyNordicEq(p1_nordic_eq.copy())).T cn_nordic_klly_20y_f = (cn_nordic_start_eq.copy()*np.array(KellyNordicEq(cn_FF_20y.copy())).T).sum(axis=1) cn_nordic_klly_30y_f =(cn_nordic_start_eq.copy()*np.array(KellyNordicEq(cn_FF_30y.copy())).T).sum(axis=1) cn_nordic_equal_f = (cn_nordic_start_eq*(1/3)).sum(axis=1) #%% os.chdir(dir_df) #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cn_nordic_equal_f, cn_nordic_klly_20y_f, cn_nordic_klly_30y_f] name_list_raw = ['cn_nordic_equal_f', 'cn_nordic_klly_20y_f', 'cn_nordic_klly_30y_f'] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'.pkl') import os import numpy as np import pandas as pd import datetime as dt import matplotlib import matplotlib.pyplot as plt import seaborn as sns import math as math from math import exp, expm1 import re dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' dir_df_ff = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes\\FF Data' #dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' nordic_cn_returns = pd.read_pickle('Nordic_cash_neutral_strategies.pkl') cn_nordic_strat_klly= pd.read_pickle('cn_nordic_strat_klly (1).pkl'); cn_nordic_strat_klly = cn_nordic_strat_klly.rename(columns={'MOM (P10 - P1)':'MOM', 'BM (P10 - P1)':'BM', 'CF (P10 - P1)':'CF'}) cn_nordic_country_klly = pd.read_pickle('nordic_country_klly_cn_half (1).pkl') #nordic_country_klly_cn_full = pd.read_pickle('nordic_country_klly_cn_full (1).pkl') #stats def CumRet(simple_ret): cumret = (1 + simple_ret).cumprod() for i in cumret.columns: if re.search('CF', i): cumret[i].iloc[51] = 1 else: cumret[i].iloc[0] = 1 return cumret ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown strategies = [nordic_cn_returns, cn_nordic_strat_klly, cn_nordic_country_klly] stratname = ['Regular', 'Strategy Kelly', 'Country Kelly'] position = ['MOM', 'BM', 'CF'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(index=stat,columns=position) df_stats = pd.DataFrame(pd.concat([df_stats]*len(stratname),axis=1, keys=stratname)) for i in range(len(strategies)): for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return = round((AnnMeanMonthReturn(strategies[i][pos])*100),2) df_stats.loc[stat[0], stratname[i]][pos] = ann_return cagr = round((CAGR(CumRet(strategies[i])[pos])*100),2) df_stats.loc[stat[1], stratname[i]][pos] = cagr std = round(AnnStd(strategies[i][pos])*100,2) df_stats.loc[stat[2], stratname[i]][pos] = std sharpe = round(Sharpe(strategies[i][pos]),2) df_stats.loc[stat[3], stratname[i]][pos] = sharpe down_ret = DownsideRet(strategies[i][pos]) sortino = round(Sortino(strategies[i][pos], down_ret),2) df_stats.loc[stat[4], stratname[i]][pos] = sortino max_dd = round(MaxDD(CumRet(strategies[i])[pos]).min()*100, 2) #only extract min value (maxDD) df_stats.loc[stat[5], stratname[i]][pos] = max_dd print(df_stats.to_latex(multicolumn=False, multirow=False)) ############################################################################## ######################### FF DATA ########################################## ########################################################################### os.chdir(dir_df_ff) mom_df = pd.read_csv('Momentum .csv', sep=';', index_col=0); mom_df.index = pd.to_datetime(mom_df.index, format='%Y%m') bm_df = pd.read_csv('BM.csv', sep=';', index_col=0); bm_df.index = pd.to_datetime(bm_df.index, format='%Y%m') cf_df = pd.read_csv('CFP.csv', sep=';', index_col=0); cf_df.index = pd.to_datetime(cf_df.index, format='%Y%m') mom_cn = mom_df['Hi PRIOR'] - mom_df['Lo PRIOR']; bm_cn = bm_df['Hi 10'] - bm_df['Lo 10'] cf_cn = cf_df['Hi 10'] - cf_df['Lo 10'] mom = mom_df[['Hi PRIOR', 'Lo PRIOR']] mom = pd.concat([mom.loc['1991-02-01':'2019-12-01'], mom_cn.loc['1991-02-01':'2019-12-01']], axis=1); mom = mom.rename(columns={'Hi PRIOR':'P10', 'Lo PRIOR':'P1', 0:'C-N',}) mom = mom/100 bm = bm_df[['Hi 10', 'Lo 10']] bm = pd.concat([bm.loc['1991-02-01':'2019-12-01'], bm_cn.loc['1991-02-01':'2019-12-01']], axis=1); bm = bm.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'C-N',}) bm = bm/100 bm = bm_df[['Hi 10', 'Lo 10']] bm_10 = pd.concat([bm.loc['1985-05-30':'1995-06-30'], bm_cn.loc['1985-05-30':'1995-06-30']], axis=1); bm_10 = bm_10.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'C-N',}) bm_10 = bm_10/100 bm = bm_df[['Hi 10', 'Lo 10']] bm_20 = pd.concat([bm.loc['1975-05-30':'1995-06-30'], bm_cn.loc['1975-05-30':'1995-06-30']], axis=1); bm_20 = bm_20.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'C-N',}) bm_20 = bm_20/100 bm = bm_df[['Hi 10', 'Lo 10']] bm_30 = pd.concat([bm.loc['1965-05-30':'1995-06-30'], bm_cn.loc['1965-05-30':'1995-06-30']], axis=1); bm_30 = bm_30.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'C-N',}) bm_30 = bm_30/100 cf = cf_df[['Hi 10', 'Lo 10']] cf = pd.concat([cf.loc['1991-02-01':'2019-12-01'], cf_cn.loc['1991-02-01':'2019-12-01']], axis=1); cf = cf.rename(columns={'Hi 10':'P10', 'Lo 10':'P1', 0:'C-N',}) cf = cf/100 ############# perf calc ########### def CumRet(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret strategies = [mom, bm_30, cf] position = ['P10', 'P1', 'C-N'] strat = ['MOM', 'BM', 'CF'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats_ff = pd.DataFrame(index=stat,columns=position) df_stats_ff = pd.concat([df_stats_ff]*len(strat), axis=1, keys=strat) for i in range(len(strategies)): for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return = round((AnnMeanMonthReturn(strategies[i][pos])*100),2) df_stats_ff.loc[stat[0], strat[i]][pos] = ann_return cagr = round((CAGR(CumRet(strategies[i])[pos])*100),2) df_stats_ff.loc[stat[1], strat[i]][pos] = cagr std = round(AnnStd(strategies[i][pos])*100,2) df_stats_ff.loc[stat[2], strat[i]][pos] = std sharpe = round(Sharpe(strategies[i][pos]),2) df_stats_ff.loc[stat[3], strat[i]][pos] = sharpe down_ret = DownsideRet(strategies[i][pos]) sortino = round(Sortino(strategies[i][pos], down_ret),2) df_stats_ff.loc[stat[4], strat[i]][pos] = sortino max_dd = round(MaxDD(CumRet(strategies[i])[pos]).min()*100, 2) #only extract min value (maxDD) df_stats_ff.loc[stat[5], strat[i]][pos] = max_dd print(df_stats_ff.to_latex(multicolumn=False, multirow=False)) import os import numpy as np import pandas as pd import datetime as dt import matplotlib import matplotlib.pyplot as plt import seaborn as sns import math as math from math import exp, expm1 import re dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' #dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' nordic_cn_returns = pd.read_pickle('Nordic_cash_neutral_strategies.pkl') cn_nordic_strat_klly= pd.read_pickle('cn_nordic_strat_klly (1).pkl'); cn_nordic_strat_klly = cn_nordic_strat_klly.rename(columns={'MOM (P10 - P1)':'MOM', 'BM (P10 - P1)':'BM', 'CF (P10 - P1)':'CF'}) cn_nordic_country_klly = pd.read_pickle('nordic_country_klly_cn_half (1).pkl') #nordic_country_klly_cn_full = pd.read_pickle('nordic_country_klly_cn_full (1).pkl') #stats def CumRet(simple_ret): cumret = (1 + simple_ret).cumprod() for i in cumret.columns: if re.search('CF', i): cumret[i].iloc[51] = 1 else: cumret[i].iloc[0] = 1 return cumret ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown strategies = [nordic_cn_returns, cn_nordic_strat_klly, cn_nordic_country_klly] stratname = ['Regular', 'Strategy Kelly', 'Country Kelly'] position = ['MOM', 'BM', 'CF'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(index=stat,columns=position) df_stats = pd.DataFrame(pd.concat([df_stats]*len(stratname),axis=1, keys=stratname)) for i in range(len(strategies)): for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return = round((AnnMeanMonthReturn(strategies[i][pos])*100),2) df_stats.loc[stat[0], stratname[i]][pos] = ann_return cagr = round((CAGR(CumRet(strategies[i])[pos])*100),2) df_stats.loc[stat[1], stratname[i]][pos] = cagr std = round(AnnStd(strategies[i][pos])*100,2) df_stats.loc[stat[2], stratname[i]][pos] = std sharpe = round(Sharpe(strategies[i][pos]),2) df_stats.loc[stat[3], stratname[i]][pos] = sharpe down_ret = DownsideRet(strategies[i][pos]) sortino = round(Sortino(strategies[i][pos], down_ret),2) df_stats.loc[stat[4], stratname[i]][pos] = sortino max_dd = round(MaxDD(CumRet(strategies[i])[pos]).min()*100, 2) #only extract min value (maxDD) df_stats.loc[stat[5], stratname[i]][pos] = max_dd print(df_stats.to_latex(multicolumn=False, multirow=False)) import os import pandas as pd import numpy as np ''' dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_cf = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/CF-to-Market' dir_sect = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/Portfolios' ''' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean\\' dir_cf = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Value\\CF-to-Market' os.chdir(dir_clean) ### Import uncleaned datasets ### sector_nor = pd.read_csv('Sector Norway clean.csv', sep=';', index_col=0); sector_nor = sector_nor.iloc[:,:2] sector_swe = pd.read_csv('Sector Sweden clean.csv', sep=',', index_col=0); sector_swe = sector_swe.iloc[:,:2] sector_dk = pd.read_csv('Sector Denmark clean.csv', sep=';', index_col=0); sector_dk = sector_dk.iloc[:,:2] sector_fin = pd.read_csv('Sector Finland clean.csv', sep=';', index_col=0); sector_fin = sector_fin.iloc[:,:2] tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_swe.index = pd.to_datetime(tr_swe.index) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_dk.index = pd.to_datetime(tr_dk.index) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0); tr_fin.index = pd.to_datetime(tr_fin.index) ## CHECK: EXISTS IN TWO COUNTRIES ## tr_swe['COMPONENTA'] tr_fin['COMPONENTA'] sector_swe.loc['COMPONENTA'] sector_fin.loc['COMPONENTA'] tr_swe['REALIA DEAD'] tr_dk['REALIA DEAD'] sector_swe.loc['REALIA DEAD'] sector_dk.loc['REALIA DEAD'] ### Change ONE OF THE VERSIONS #COMPONENTA tr_fin = tr_fin.rename(columns={'COMPONENTA':'COMPONENTA FIN'}) sector_fin = sector_fin.rename(index={'COMPONENTA':'COMPONENTA FIN'}) #REALIA DEAD tr_dk = tr_dk.rename(columns={'REALIA DEAD':'REALIA DEAD DK'}) sector_dk = sector_dk.rename(index={'REALIA DEAD':'REALIA DEAD DK'}) ### concatenate Sector to Economic Sectors def EconomicSector(sector): #Create column: Economic Sectors sector['ECONOMIC SECTOR'] = sector['SECTOR'] ### concatenate Sector to Economic Sectors sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Energy - Fossil Fuels)|(Renewable Energy)|(Uranium)','Energy') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Chemicals)|(Mineral Resources)|(Applied Resources)','Basic Materials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Industrial Goods)|(Industrial & Commercial Services)|(Industrial Conglomerates)|(Transportation)','Industrials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Automobiles & Auto Parts)|(Cyclical Consumer Products)|(Cyclical Consumer Services)|(Retailers)','Cyclical Consumer Goods & Services') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Food & Beverages)|(Personal & Household Products & Services)|(Food & Drug Retailing)','Non-Cyclical Consumer Goods & Services') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Banking & Investment Services)|(Insurance)|(Real Estate)|(Collective Investments)|(Holding Companies)' , 'Financials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Healthcare Services & Equipment)|(Pharmaceuticals & Medical Research)','Healthcare') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Technology Equipment)|(Software & IT Services)','Technology') return sector sector_nor_1 = EconomicSector(sector_nor.copy()) sector_swe_1 = EconomicSector(sector_swe.copy()) sector_dk_1 = EconomicSector(sector_dk.copy()) sector_fin_1 = EconomicSector(sector_fin.copy()) ## create df of all stocks and sectors tr_all = pd.concat([tr_nor, tr_swe, tr_dk, tr_fin], axis=1) sectors_all = pd.concat([sector_nor_1, sector_swe_1, sector_dk_1, sector_fin_1], axis=0) #################### Select which type of stats (ECONOMIC SECTOR, SECTOR or INDUSTRY) ################### stat_type = 'ECONOMIC SECTOR' #write preffered grouping economic_sectors = list(sectors_all.groupby(stat_type).count().index) #create dict with portfolios for each sector sector_portfolios = {} for j in range(len(economic_sectors)): sector_list = list() for i in range(len(sectors_all)): if sectors_all[stat_type][i] == economic_sectors[j]: sector_list.append(sectors_all.index[i]) sector_portfolios[economic_sectors[j]] = sector_list #create dict with averages for each stock in each sector portfolio_mean = {} for i in sector_portfolios: mean_list = list() for j in range(len(sector_portfolios[i])): x = tr_all[sector_portfolios[i][j]].dropna() if x.count() <= 1 or x.sum() == 0: continue else: mean_list.append(x.pct_change().mean()) portfolio_mean[str(i)] = mean_list #create sector average for all countries sector_avg = pd.DataFrame(index=economic_sectors, columns=['Annual mean return']) for i in portfolio_mean: df = pd.DataFrame(portfolio_mean[i]) df = df.replace([np.inf, -np.inf], np.nan) x = round(df.mean()*12*100, 2) sector_avg.loc[i] = x[0] sector_avg.plot.barh() sector_avg = sector_avg.sort_values(by='Annual mean return', ascending=False) import os import pandas as pd import numpy as np ''' dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_cf = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Value/CF-to-Market' dir_sect = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/Portfolios' ''' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean\\' dir_cf = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Value\\CF-to-Market' os.chdir(dir_clean) # IMPORT MV TO SORT OUT < 50% mv_nor_raw = pd.read_csv('MV Norway clean.csv', index_col=0); mv_swe_raw = pd.read_csv('MV Sweden clean.csv', index_col=0); mv_dk_raw = pd.read_csv('MV Denmark clean.csv', index_col=0); mv_fin_raw = pd.read_csv('MV Finland clean.csv', index_col=0); ### Import uncleaned datasets ### sector_nor = pd.read_csv('Sector Norway clean.csv', sep=';', index_col=0); sector_nor = sector_nor.iloc[:,:2] sector_swe = pd.read_csv('Sector Sweden clean.csv', sep=';', index_col=0); sector_swe = sector_swe.iloc[:,:2] sector_dk = pd.read_csv('Sector Denmark clean.csv', sep=';', index_col=0); sector_dk = sector_dk.iloc[:,:2] sector_fin = pd.read_csv('Sector Finland clean.csv', sep=';', index_col=0); sector_fin = sector_fin.iloc[:,:2] tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_swe.index = pd.to_datetime(tr_swe.index) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_dk.index = pd.to_datetime(tr_dk.index) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0); tr_fin.index = pd.to_datetime(tr_fin.index) ## CHECK: EXISTS IN TWO COUNTRIES ## tr_swe['COMPONENTA'] tr_fin['COMPONENTA'] sector_swe.loc['COMPONENTA'] sector_fin.loc['COMPONENTA'] mv_swe_raw['COMPONENTA'] mv_fin_raw['COMPONENTA'] tr_swe['REALIA DEAD'] tr_dk['REALIA DEAD'] sector_swe.loc['REALIA DEAD'] sector_dk.loc['REALIA DEAD'] mv_swe_raw['REALIA DEAD'] mv_dk_raw['REALIA DEAD'] ### Change ONE OF THE VERSIONS #COMPONENTA tr_fin = tr_fin.rename(columns={'COMPONENTA':'COMPONENTA FIN'}) sector_fin = sector_fin.rename(index={'COMPONENTA':'COMPONENTA FIN'}) mv_fin_raw = mv_fin_raw.rename(columns={'COMPONENTA':'COMPONENTA FIN'}) #REALIA DEAD tr_dk = tr_dk.rename(columns={'REALIA DEAD':'REALIA DEAD DK'}) sector_dk = sector_dk.rename(index={'REALIA DEAD':'REALIA DEAD DK'}) mv_dk_raw = mv_dk_raw.rename(columns={'REALIA DEAD':'REALIA DEAD DK'}) ### concatenate Sector to Economic Sectors def EconomicSector(sector): #Create column: Economic Sectors sector['ECONOMIC SECTOR'] = sector['SECTOR'] ### concatenate Sector to Economic Sectors sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Energy - Fossil Fuels)|(Renewable Energy)|(Uranium)','Energy') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Chemicals)|(Mineral Resources)|(Applied Resources)','Basic Materials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Industrial Goods)|(Industrial & Commercial Services)|(Industrial Conglomerates)|(Transportation)','Industrials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Automobiles & Auto Parts)|(Cyclical Consumer Products)|(Cyclical Consumer Services)|(Retailers)','Cyclical Consumer Goods & Services') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Food & Beverages)|(Personal & Household Products & Services)|(Food & Drug Retailing)','Non-Cyclical Consumer Goods & Services') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Banking & Investment Services)|(Insurance)|(Real Estate)|(Collective Investments)|(Holding Companies)' , 'Financials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Healthcare Services & Equipment)|(Pharmaceuticals & Medical Research)','Healthcare') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Technology Equipment)|(Software & IT Services)','Technology') return sector # CREATE DF WITH ALL STOCKS mv_all = pd.concat([mv_nor_raw, mv_swe_raw, mv_dk_raw, mv_fin_raw], axis=1) sectors_all = pd.concat([EconomicSector(sector_nor), EconomicSector(sector_swe), EconomicSector(sector_dk), EconomicSector(sector_fin)], axis=0) # REMOVE TR VALUES FOR STOCKS BELOW 50% MCAP MV_decile = 5 def MV_quantiles(MV, tr): for i in MV.index: rowquantiles = pd.qcut(MV.loc[i,:], 10, labels= np.arange(1,11,1)) for j in range(len(MV.loc[i])): if rowquantiles[MV.columns[j]] <= MV_decile: tr.loc[i,MV.columns[j]] = np.nan else: continue return tr tr_nor_test = MV_quantiles(mv_nor_raw, tr_nor) tr_swe_test = MV_quantiles(mv_swe_raw, tr_swe) tr_dk_test = MV_quantiles(mv_dk_raw, tr_dk) tr_fin_test = MV_quantiles(mv_fin_raw, tr_fin) tr_all = pd.concat([tr_nor_test, tr_swe_test, tr_dk_test, tr_fin_test], axis=1) ####### CALCULATE SECTOR AVG ############# #################### Select which type of stats (ECONOMIC SECTOR, SECTOR or INDUSTRY) ################### stat_type = 'ECONOMIC SECTOR' #write preffered grouping economic_sectors = list(sectors_all.groupby(stat_type).count().index) ##start countries = ['Norway', 'Sweden', 'Denmark', 'Finland', 'Nordic'] sectors = (sector_nor, sector_swe, sector_dk, sector_fin, sectors_all) df_stats= pd.DataFrame(index = economic_sectors, columns=['Nr of stocks', 'Annual mean return']) df_stats = pd.DataFrame(pd.concat([df_stats]*len(countries),axis=1, keys=countries)) for s in range(len(sectors)): #add economic sector to original sectordf sector = EconomicSector(sectors[s].copy()) #create dict with portfolios for each sector sector_portfolios = {} for j in range(len(economic_sectors)): sector_list = list() for i in range(len(sector)): if sector[stat_type][i] == economic_sectors[j]: sector_list.append(sector.index[i]) sector_portfolios[economic_sectors[j]] = sector_list #create df containing nr of stocks and mean for all (later) for k in sector_portfolios: dff = pd.DataFrame(index=sector_portfolios[k], columns=['yes']) for i in dff.index: if tr_all[i].sum()>0: dff.loc[i] = 1 df_stats[countries[s],'Nr of stocks'].loc[k] = dff['yes'].count() #create dict with averages for each stock in each sector portfolio_mean = {} for i in sector_portfolios: mean_list = list() for j in range(len(sector_portfolios[i])): x = tr_all[sector_portfolios[i][j]]#.dropna() if x.count() <= 1 or x.sum() == 0: continue else: mean_list.append(x.pct_change().mean()) portfolio_mean[str(i)] = mean_list #create sector average for all countries for i in portfolio_mean: df = pd.DataFrame(portfolio_mean[i]) df = df.replace([np.inf, -np.inf], np.nan) x = round(df.mean()*12*100, 2) df_stats[countries[s], 'Annual mean return'].loc[i] = x[0] sector_avg.plot.barh() sector_avg = sector_avg.sort_values(by='Annual mean return', ascending=False) import os import pandas as pd import json import pickle import numpy as np import copy from collections import Counter # %% Data importing dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_port = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/Portfolios' dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_sect = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Sector and Industry stats/SectorData' #dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' ### Import cleaned datasets ##### os.chdir(dir_clean) ### Import cleaned datasets ### sector_nor = pd.read_csv('Sector Norway clean.csv', sep=';', index_col=0); sector_nor = sector_nor.iloc[:,:2] sector_swe = pd.read_csv('Sector Sweden clean.csv', sep=';', index_col=0); sector_swe = sector_swe.iloc[:,:2] sector_dk = pd.read_csv('Sector Denmark clean.csv', sep=';', index_col=0); sector_dk = sector_dk.iloc[:,:2] sector_fin = pd.read_csv('Sector Finland clean.csv', sep=';', index_col=0); sector_fin = sector_fin.iloc[:,:2] nokusd = pd.read_csv(r'USDNOK clean.csv', index_col=0) sekusd = pd.read_csv(r'USDSEK clean.csv', index_col=0) dkkusd = pd.read_csv(r'USDDKK clean.csv', index_col=0) usdfmreur = pd.read_csv(r'USDFMREUR clean.csv', index_col=0) mv_nor = pd.read_csv('MV Norway clean.csv', index_col=0) mv_swe = pd.read_csv('MV Sweden clean.csv', index_col=0) mv_dk = pd.read_csv('MV Denmark clean.csv', index_col=0) mv_fin = pd.read_csv('MV Finland clean.csv', index_col=0) mv_nor1 = mv_nor.copy() * 1000000 mv_swe1 = mv_swe.copy() * 1000000 mv_dk1 = mv_dk.copy() * 1000000 mv_fin1 = mv_fin.copy() * 1000000 mv_nor2 = mv_nor1.iloc[:,:].div(nokusd.DEXNOUS, axis=0) mv_swe2 = mv_swe1.iloc[:,:].div(sekusd.DEXSDUS, axis=0) mv_dk2 = mv_dk1.iloc[:,:].div(dkkusd.DEXDNUS, axis=0) mv_fin2 = mv_fin1.iloc[:,:].div(usdfmreur.FN_EU_US, axis=0) # %% Sector data ### concatenate Sector to Economic Sectors def EconomicSector(sector): #Create column: Economic Sectors sector['ECONOMIC SECTOR'] = sector['SECTOR'] ### concatenate Sector to Economic Sectors sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Energy - Fossil Fuels)|(Renewable Energy)|(Uranium)','Energy') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Chemicals)|(Mineral Resources)|(Applied Resources)','Basic Materials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Industrial Goods)|(Industrial & Commercial Services)|(Industrial Conglomerates)|(Transportation)','Industrials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Automobiles & Auto Parts)|(Cyclical Consumer Products)|(Cyclical Consumer Services)|(Retailers)','Consumer Cycl.') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Food & Beverages)|(Personal & Household Products & Services)|(Food & Drug Retailing)','Consumer Non-Cycl.') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Banking & Investment Services)|(Insurance)|(Real Estate)|(Collective Investments)|(Holding Companies)' , 'Financials') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Healthcare Services & Equipment)|(Pharmaceuticals & Medical Research)','Healthcare') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Technology Equipment)|(Software & IT Services)','Technology') sector['ECONOMIC SECTOR'] = sector['ECONOMIC SECTOR'].str.replace('(Telecommunications Services)','Telecom') return sector sector_nor_1 = EconomicSector(sector_nor.copy()) sector_swe_1 = EconomicSector(sector_swe.copy()) sector_dk_1 = EconomicSector(sector_dk.copy()) sector_fin_1 = EconomicSector(sector_fin.copy()) # %% Count of each sector def GetComposition(nor, swe, dk, fin): composition = pd.concat([nor.count(), swe.count(), dk.count(), fin.count()], axis=1,keys=['Norway','Sweden','Denmark','Finland'], sort=False) return composition count_comp = GetComposition(sector_nor_1.copy().groupby('ECONOMIC SECTOR')['SECTOR'], sector_swe_1.copy().groupby('ECONOMIC SECTOR')['SECTOR'], sector_dk_1.copy().groupby('ECONOMIC SECTOR')['SECTOR'], sector_fin_1.copy().groupby('ECONOMIC SECTOR')['SECTOR']) # %% Getting sector of stocks we have invested in ## Load all portfolios and collect stocknames (non-duplicates) into a dictionary os.chdir(dir_port) strategy_dict = ['Mom 2-6M','Mom 2-12M', 'BM', 'CF'] port_dict = ['long_port', 'short_port'] market_dict = ['nor','swe','dk','fin'] nested_dict = {} for strat in strategy_dict: nested_dict[strat] = {} for market in market_dict: nested_dict[strat][market] = {} sector = locals()['sector_{}_1'.format(market)] for name in port_dict: nested_dict[strat][market][name] = {} with open('{} {}_{}.json'.format(strat, name, market), 'r') as fp: dated_stocks = json.load(fp) dated_stocks.pop('2019-12-31') for i in dated_stocks: for j in range(len(dated_stocks[i])): dated_stocks[i][j] = sector.loc[dated_stocks[i][j]]['ECONOMIC SECTOR'] nested_dict[strat][market][name] = dated_stocks sector_dict = copy.deepcopy(nested_dict) for strat in strategy_dict: for market in market_dict: for port in port_dict: flat_list = [] values = sector_dict[strat][market][port].values() for sublist in values: for item in sublist: flat_list.append(item) flat_df = pd.DataFrame(flat_list, columns=['ECONOMIC SECTOR']) sector_dict[strat][market][port] = flat_df['ECONOMIC SECTOR'].value_counts()/len(flat_df) # %% MV of 50% top def MV_quantiles(MV): MV_decile = 5 MV_red = MV.copy() for i in MV.index: rowquantiles = pd.qcut(MV.loc[i,:], 10, labels= np.arange(1,11,1)) for j in range(len(MV.loc[i])): if rowquantiles[MV.columns[j]] <= MV_decile: MV_red.loc[i,MV.columns[j]] = np.nan else: continue return MV_red mv_red_nor = MV_quantiles(mv_nor2.copy()); mv_red_swe = MV_quantiles(mv_swe2.copy()); mv_red_dk = MV_quantiles(mv_dk2.copy()); mv_red_fin = MV_quantiles(mv_fin2.copy()); # %% MV of each sector def MvComposition(sector, MV, year): sector = sector.loc[sector.index[sector.index.isin(MV.columns)],:] MV = MV.loc[:,MV.columns[MV.columns.isin(sector.index)]] stat_type = 'ECONOMIC SECTOR' #write preffered grouping economic_sectors = list(sector.groupby(stat_type).count().index) #create dict with portfolios for each sector sector_portfolios = {} for j in range(len(economic_sectors)): sector_list = list() for i in range(len(sector)): if sector[stat_type][i] == economic_sectors[j]: sector_list.append(sector.index[i]) sector_portfolios[economic_sectors[j]] = sector_list #create dict with market values for each stock in each sector sector_mv = {} for i in sector_portfolios: mv_sector = MV.loc[:,sector_portfolios[i]].iloc[year].sum()/MV.iloc[year].sum() sector_mv[str(i)] = mv_sector return sector_mv def GetMvComposition(sect_nor, sect_swe, sect_dk, sect_fin, MV_nor, MV_swe, MV_dk, MV_fin): mv_comp_90 = pd.concat([pd.DataFrame.from_dict(MvComposition(sect_nor, MV_nor, 0), orient='index'), pd.DataFrame.from_dict(MvComposition(sect_swe, MV_swe, 0), orient='index'), pd.DataFrame.from_dict(MvComposition(sect_dk, MV_dk, 0), orient='index'), pd.DataFrame.from_dict(MvComposition(sect_fin, MV_fin, 0), orient='index')], axis=1,keys=['Norway','Sweden','Denmark','Finland'], sort=False) mv_comp_19 = pd.concat([pd.DataFrame.from_dict(MvComposition(sect_nor, MV_nor, -1), orient='index'), pd.DataFrame.from_dict(MvComposition(sect_swe, MV_swe, -1), orient='index'), pd.DataFrame.from_dict(MvComposition(sect_dk, MV_dk, -1), orient='index'), pd.DataFrame.from_dict(MvComposition(sect_fin, MV_fin, -1), orient='index')], axis=1,keys=['Norway','Sweden','Denmark','Finland'], sort=False) return pd.concat([mv_comp_90, mv_comp_19], axis=1, keys=['1990','2019'], sort=False) mv_comp = GetMvComposition(sector_nor_1.copy(), sector_swe_1.copy(), sector_dk_1.copy(), sector_fin_1.copy(), mv_red_nor.copy(), mv_red_swe.copy(), mv_red_dk.copy(), mv_red_fin.copy()) # %% Save Dataframes os.chdir(dir_df) count_comp.to_pickle('sector_count_composition.pkl') mv_comp.to_pickle('sector_mv_composition.pkl') # Store data (serialize) with open('sector_dict.pickle', 'wb') as handle: pickle.dump(sector_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) os.chdir(dir_sect) sector_nor_1.to_pickle('sector_nor.pkl') sector_swe_1.to_pickle('sector_swe.pkl') sector_dk_1.to_pickle('sector_dk.pkl') sector_fin_1.to_pickle('sector_fin.pkl') # %% SECTOR ALLOCATION FOR THE STRATEGIES OVER TIME time_dict = copy.deepcopy(nested_dict) allocation_dict = copy.deepcopy(nested_dict) allocation_dict = {} for strat in time_dict: allocation_dict[strat] = {} for market in market_dict: allocation_dict[strat][market] = {} for port in port_dict: allocation_dict[strat][market][port] = {} for date in time_dict[strat][market][port]: allocation_dict[strat][market][port][date] = {} for sect in time_dict[strat][market][port][date]: alloc_monthly = pd.DataFrame(data = np.array(list(Counter(time_dict[strat][market][port][date]).values()))/np.array(list(Counter(time_dict[strat][market][port][date]).values())).sum(), index =list(Counter(time_dict[strat][market][port][date]).keys()), columns = ['Sector']) allocation_dict[strat][market][port][date] = alloc_monthly os.chdir(dir_df) # Store data (serialize) with open('allocation_dict.pickle', 'wb') as handle: pickle.dump(allocation_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) import os import pandas as pd ### Import cleaned datasets ##### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0); ### Import uncleaned datasets ### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Close Price (End-of-Month)') p_nor = pd.read_csv('Price Norway.csv', sep=',', index_col=0); p_nor.index = pd.to_datetime(p_nor.index) p_swe = pd.read_csv('Price Sweden.csv', sep=',', index_col=0); p_swe.index = pd.to_datetime(p_swe.index) p_dk = pd.read_csv('Price Denmark.csv', sep=';', index_col=0); p_dk.index = pd.to_datetime(p_dk.index) p_fin = pd.read_csv('Price Finland.csv', sep=';', index_col=0); p_fin.index = pd.to_datetime(p_fin.index) ### Parse rows in uncleaned datasets with the ones in cleaned ones ##### prices_nor = p_nor.loc[p_nor.index[p_nor.index.isin(tr_nor.index)],:] prices_swe = p_swe.loc[p_swe.index[p_swe.index.isin(tr_swe.index)],:] prices_dk = p_dk.loc[p_dk.index[p_dk.index.isin(tr_dk.index)],:] prices_fin = p_fin.loc[p_fin.index[p_fin.index.isin(tr_fin.index)],:] # Delete all the columns that don't change (delisted before our start date) prices_nor = prices_nor.loc[:,(abs(prices_nor.pct_change()).sum(axis=0) != 0)] prices_swe = prices_swe.loc[:,(abs(prices_swe.pct_change()).sum(axis=0) != 0)] prices_dk = prices_dk.loc[:,(abs(prices_dk.pct_change()).sum(axis=0) != 0)] prices_fin = prices_fin.loc[:,(abs(prices_fin.pct_change()).sum(axis=0) != 0)] ######## Get only company names as column names ######## def CleanColNames(Data): Data.columns = Data.columns.str.replace('[ ](-)[ ].*', '') return Data prices_nor1 = CleanColNames(prices_nor.copy()) prices_swe1 = CleanColNames(prices_swe.copy()) prices_dk1 = CleanColNames(prices_dk.copy()) prices_fin1 = CleanColNames(prices_fin.copy()) ### Parse columns in uncleaned datasets with the ones in cleaned ones ##### prices_nor2 = prices_nor1.loc[:,prices_nor1.columns[prices_nor1.columns.isin(tr_nor.columns)]] prices_swe2 = prices_swe1.loc[:,prices_swe1.columns[prices_swe1.columns.isin(tr_swe.columns)]] prices_dk2 = prices_dk1.loc[:,prices_dk1.columns[prices_dk1.columns.isin(tr_dk.columns)]] prices_fin2 = prices_fin1.loc[:,prices_fin1.columns[prices_fin1.columns.isin(tr_fin.columns)]] ### Delete all values not in clean dataset (repetition and before IPO) #### prices_nor3 = prices_nor2.sort_index(axis=1)[tr_nor.sort_index(axis=1).notna()] prices_swe3 = prices_swe2.sort_index(axis=1)[tr_swe.sort_index(axis=1).notna()] prices_dk3 = prices_dk2.sort_index(axis=1)[tr_dk.sort_index(axis=1).notna()] prices_fin3 = prices_fin2.sort_index(axis=1)[tr_fin.sort_index(axis=1).notna()] ######## Write clean datasets to CSV ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') prices_nor3.to_csv(r'Price Norway ME clean.csv') prices_swe3.to_csv(r'Price Sweden ME clean.csv') prices_dk3.to_csv(r'Price Denmark ME clean.csv') prices_fin3.to_csv(r'Price Finland ME clean.csv') import os import pandas as pd ### Import cleaned datasets ##### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_swe.index = pd.to_datetime(tr_swe.index) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_dk.index = pd.to_datetime(tr_dk.index) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0);tr_fin.index = pd.to_datetime(tr_fin.index) tr_nor.index = tr_nor.index + pd.offsets.MonthBegin(0); tr_swe.index = tr_swe.index + pd.offsets.MonthBegin(0) tr_dk.index = tr_dk.index + pd.offsets.MonthBegin(0); tr_fin.index = tr_fin.index + pd.offsets.MonthBegin(0) ### Import uncleaned datasets ### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Close Price (Start-of-Month)') p_nor = pd.read_csv('Price Norway MS.csv', sep=',', index_col=0); p_nor.index = pd.to_datetime(p_nor.index, format='%Y-%m-%d') p_swe = pd.read_csv('Price Sweden MS.csv', sep=',', index_col=0); p_swe.index = pd.to_datetime(p_swe.index, format='%Y-%m-%d') p_dk = pd.read_csv('Price Denmark MS.csv', sep=',', index_col=0); p_dk.index = pd.to_datetime(p_dk.index, format='%Y-%m-%d') p_fin = pd.read_csv('Price Finland MS.csv', sep=',', index_col=0); p_fin.index = pd.to_datetime(p_fin.index, format='%Y-%m-%d') ### Parse rows in uncleaned datasets with the ones in cleaned ones ##### prices_nor = p_nor.loc[p_nor.index[p_nor.index.isin(tr_nor.index)],:] prices_swe = p_swe.loc[p_swe.index[p_swe.index.isin(tr_swe.index)],:] prices_dk = p_dk.loc[p_dk.index[p_dk.index.isin(tr_dk.index)],:] prices_fin = p_fin.loc[p_fin.index[p_fin.index.isin(tr_fin.index)],:] # Delete all the columns that don't change (delisted before our start date) prices_nor = prices_nor.loc[:,(abs(prices_nor.pct_change()).sum(axis=0) != 0)] prices_swe = prices_swe.loc[:,(abs(prices_swe.pct_change()).sum(axis=0) != 0)] prices_dk = prices_dk.loc[:,(abs(prices_dk.pct_change()).sum(axis=0) != 0)] prices_fin = prices_fin.loc[:,(abs(prices_fin.pct_change()).sum(axis=0) != 0)] ######## Get only company names as column names ######## def CleanColNames(Data): Data.columns = Data.columns.str.replace('[ ](-)[ ].*', '') return Data prices_nor1 = CleanColNames(prices_nor.copy()) prices_swe1 = CleanColNames(prices_swe.copy()) prices_dk1 = CleanColNames(prices_dk.copy()) prices_fin1 = CleanColNames(prices_fin.copy()) ### Parse columns in uncleaned datasets with the ones in cleaned ones ##### prices_nor2 = prices_nor1.loc[:,prices_nor1.columns[prices_nor1.columns.isin(tr_nor.columns)]] prices_swe2 = prices_swe1.loc[:,prices_swe1.columns[prices_swe1.columns.isin(tr_swe.columns)]] prices_dk2 = prices_dk1.loc[:,prices_dk1.columns[prices_dk1.columns.isin(tr_dk.columns)]] prices_fin2 = prices_fin1.loc[:,prices_fin1.columns[prices_fin1.columns.isin(tr_fin.columns)]] ### Delete all values not in clean dataset (repetition and before IPO) #### prices_nor3 = prices_nor2.sort_index(axis=1)[tr_nor.sort_index(axis=1).notna()] prices_swe3 = prices_swe2.sort_index(axis=1)[tr_swe.sort_index(axis=1).notna()] prices_dk3 = prices_dk2.sort_index(axis=1)[tr_dk.sort_index(axis=1).notna()] prices_fin3 = prices_fin2.sort_index(axis=1)[tr_fin.sort_index(axis=1).notna()] ######## Write clean datasets to CSV ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') prices_nor3.to_csv(r'Price Norway MS clean.csv') prices_swe3.to_csv(r'Price Sweden MS clean.csv') prices_dk3.to_csv(r'Price Denmark MS clean.csv') prices_fin3.to_csv(r'Price Finland MS clean.csv') import os import pandas as pd dir_cf = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Free Cash Flow' dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' #dir_cf = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Raw\\Free Cash Flow' #dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' ### Import cleaned datasets ##### os.chdir(dir_clean) tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_swe.index = pd.to_datetime(tr_swe.index) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_dk.index = pd.to_datetime(tr_dk.index) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0); tr_fin.index = pd.to_datetime(tr_fin.index) ### Rename the unmached in Denmark ### tr_dk.rename(columns={"VELOXIS PHARMACEUTICALS": "VELOXIS PHARMACEUTICALS DEAD"}, inplace =True) ### Import uncleaned datasets ### os.chdir(dir_cf) #FCF_nor = pd.read_csv('FCF Norway.csv' , sep=';', index_col=0); FCF_nor.index = pd.to_datetime(FCF_nor.index) FCF_nor = pd.read_csv('FCF Norway new.csv' , sep=';', index_col=0); FCF_nor.index = pd.to_datetime(FCF_nor.index) FCF_swe = pd.read_csv('FCF Sweden new.csv' , sep=';', index_col=0); FCF_swe.index = pd.to_datetime(FCF_swe.index) FCF_dk = pd.read_csv('FCF Denmark new.csv' , sep=';', index_col=0); FCF_dk.index = pd.to_datetime(FCF_dk.index) FCF_fin = pd.read_csv('FCF Finland new.csv' , sep=';', index_col=0); FCF_fin.index = pd.to_datetime(FCF_fin.index) ### Parse rows in uncleaned datasets with the ones in cleaned ones ##### FC_nor = FCF_nor.loc[FCF_nor.index[FCF_nor.index.isin(tr_nor.index)],:] FC_swe = FCF_swe.loc[FCF_swe.index[FCF_swe.index.isin(tr_swe.index)],:] FC_dk = FCF_dk.loc[FCF_dk.index[FCF_dk.index.isin(tr_dk.index)],:] FC_fin = FCF_fin.loc[FCF_fin.index[FCF_fin.index.isin(tr_fin.index)],:] ######## Get only company names as column names ######## def CleanColNames(Data): Data.columns = Data.columns.str.replace('[ ](-)[ ].*', '') return Data FC_nor_1 = CleanColNames(FC_nor.copy()) FC_swe_1 = CleanColNames(FC_swe.copy()) FC_dk_1 = CleanColNames(FC_dk.copy()) FC_fin_1 = CleanColNames(FC_fin.copy()) ### Parse columns in uncleaned datasets with the ones in cleaned ones ##### FC_nor_2 = FC_nor_1.loc[:,FC_nor_1.columns[FC_nor_1.columns.isin(tr_nor.columns)]] FC_swe_2 = FC_swe_1.loc[:,FC_swe_1.columns[FC_swe_1.columns.isin(tr_swe.columns)]] FC_dk_2 = FC_dk_1.loc[:,FC_dk_1.columns[FC_dk_1.columns.isin(tr_dk.columns)]] FC_fin_2 = FC_fin_1.loc[:,FC_fin_1.columns[FC_fin_1.columns.isin(tr_fin.columns)]] ### Delete all values not in clean dataset (repetition and before IPO) #### FC_nor_3 = FC_nor_2.sort_index(axis=1)[tr_nor.sort_index(axis=1).notna()] FC_swe_3 = FC_swe_2.sort_index(axis=1)[tr_swe.sort_index(axis=1).notna()] FC_dk_3 = FC_dk_2.sort_index(axis=1)[tr_dk.sort_index(axis=1).notna()] FC_fin_3 = FC_fin_2.sort_index(axis=1)[tr_fin.sort_index(axis=1).notna()] ######## Write clean datasets to CSV ######### os.chdir(dir_clean) FC_nor_3.to_csv(r'CF Norway clean new.csv') FC_swe_3.to_csv(r'CF Sweden clean new.csv') FC_dk_3.to_csv(r'CF Denmark clean new.csv') FC_fin_3.to_csv(r'CF Finland clean new.csv') import os import pandas as pd ### Import cleaned datasets ##### os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_swe.index = pd.to_datetime(tr_swe.index) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_dk.index = pd.to_datetime(tr_dk.index) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0); tr_fin.index = pd.to_datetime(tr_fin.index) ### Import uncleaned datasets ### os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Market Value (MCAP)') MCAP_nor = pd.read_csv('MV Norway.csv' , sep=',', index_col=0); MCAP_nor.index = pd.to_datetime(MCAP_nor.index) MCAP_swe = pd.read_csv('MV Sweden.csv' , sep=',', index_col=0); MCAP_swe.index = pd.to_datetime(MCAP_swe.index) MCAP_dk = pd.read_csv('MV Denmark.csv' , sep=',', index_col=0); MCAP_dk.index = pd.to_datetime(MCAP_dk.index) MCAP_fin = pd.read_csv('MV Finland.csv' , sep=',', index_col=0); MCAP_fin.index = pd.to_datetime(MCAP_fin.index) # Delete all the columns that don't change (delisted before our start date) MCAP_nor = MCAP_nor.loc[:,(abs(MCAP_nor.pct_change()).sum(axis=0) != 0)] MCAP_swe = MCAP_swe.loc[:,(abs(MCAP_swe.pct_change()).sum(axis=0) != 0)] MCAP_dk = MCAP_dk.loc[:,(abs(MCAP_dk.pct_change()).sum(axis=0) != 0)] MCAP_fin = MCAP_fin.loc[:,(abs(MCAP_fin.pct_change()).sum(axis=0) != 0)] ######## Get only company names as column names ######## def CleanColNames(Data): Data.columns = Data.columns.str.replace('[ ](-)[ ].*', '') return Data MV_nor = CleanColNames(MCAP_nor.copy()) MV_swe = CleanColNames(MCAP_swe.copy()) MV_dk = CleanColNames(MCAP_dk.copy()) MV_fin = CleanColNames(MCAP_fin.copy()) ### Parse columns in uncleaned datasets with the ones in cleaned ones ##### MV_nor_1 = MV_nor.loc[:,MV_nor.columns[MV_nor.columns.isin(tr_nor.columns)]] MV_swe_1 = MV_swe.loc[:,MV_swe.columns[MV_swe.columns.isin(tr_swe.columns)]] MV_dk_1 = MV_dk.loc[:,MV_dk.columns[MV_dk.columns.isin(tr_dk.columns)]] MV_fin_1 = MV_fin.loc[:,MV_fin.columns[MV_fin.columns.isin(tr_fin.columns)]] ### Delete all values not in clean dataset (repetition and before IPO) #### MV_nor_2 = MV_nor_1.sort_index(axis=1)[tr_nor.sort_index(axis=1).notna()] MV_swe_2 = MV_swe_1.sort_index(axis=1)[tr_swe.sort_index(axis=1).notna()] MV_dk_2 = MV_dk_1.sort_index(axis=1)[tr_dk.sort_index(axis=1).notna()] MV_fin_2 = MV_fin_1.sort_index(axis=1)[tr_fin.sort_index(axis=1).notna()] ######## Write clean datasets to CSV ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') MV_nor_2.to_csv(r'MV Norway clean.csv') MV_swe_2.to_csv(r'MV Sweden clean.csv') MV_dk_2.to_csv(r'MV Denmark clean.csv') MV_fin_2.to_csv(r'MV Finland clean.csv') ''' ### Parse rows in uncleaned datasets with the ones in cleaned ones ##### MV_nor = MCAP_nor.loc[MCAP_nor.index[MCAP_nor.index.isin(tr_nor.index)],:] MV_swe = MCAP_swe.loc[MCAP_swe.index[MCAP_swe.index.isin(tr_swe.index)],:] MV_dk = MCAP_dk.loc[MCAP_dk.index[MCAP_dk.index.isin(tr_dk.index)],:] MV_fin = MCAP_fin.loc[MCAP_fin.index[MCAP_fin.index.isin(tr_fin.index)],:] def ChangeToFloat(Data): for i in Data: for j in range(len(Data)): typee = type(Data[i][j]) if typee == str: Data[i][j] = float(Data[i][j].replace(',','.')) else: continue return Data MV_nor = ChangeToFloat(MV_nor) MV_swe = ChangeToFloat(MV_swe) MV_dk = ChangeToFloat(MV_dk) MV_fin = ChangeToFloat(MV_fin) os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Market Value (MCAP)') MV_nor.to_csv(r'MV Norway.csv') MV_swe.to_csv(r'MV Sweden.csv') MV_dk.to_csv(r'MV Denmark.csv') MV_fin.to_csv(r'MV Finland.csv') ### Rename the unmached in Denmark ### ### This is not needed now, but keep the code MCAP_nor.rename(columns={"NEXTGENTEL DEAD - DELIST.16/04/19 - MARKET VALUE":"NEXTGENTEL. DEAD - DELIST.16/04/19 - MARKET VALUE"}, inplace =True) MCAP_swe.rename(columns={"COMEXCHANGE DEAD - DELIST 05/04/01 - MARKET VALUE":"COMEXCHANGE. DEAD - DELIST 05/04/01 - MARKET VALUE", "FABEGE 'B' DEAD - DELIST 02/12/97 - MARKET VALUE":"FABEGE 'B'. DEAD - DELIST 02/12/97 - MARKET VALUE", "FABEGE 'B' DEAD - TAKEOVER 505155 - MARKET VALUE":"FABEGE 'B'.. DEAD - TAKEOVER 505155 - MARKET VALUE"}, inplace =True) import os import pandas as pd ### Import cleaned datasets ##### os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index) tr_swe = pd.read_csv('TR Sweden clean.csv', index_col=0); tr_swe.index = pd.to_datetime(tr_swe.index) tr_dk = pd.read_csv('TR Denmark clean.csv', index_col=0); tr_dk.index = pd.to_datetime(tr_dk.index) tr_fin = pd.read_csv('TR Finland clean.csv', index_col=0); tr_fin.index = pd.to_datetime(tr_fin.index) ### Import uncleaned datasets ### os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') MV_nor = pd.read_csv('MV Norway clean.csv' , index_col=0); MV_nor.index = pd.to_datetime(MV_nor.index) MV_swe = pd.read_csv('MV Sweden clean.csv' , index_col=0); MV_swe.index = pd.to_datetime(MV_swe.index) MV_dk = pd.read_csv('MV Denmark clean.csv' , index_col=0); MV_dk.index = pd.to_datetime(MV_dk.index) MV_fin = pd.read_csv('MV Finland clean.csv' ,index_col=0); MV_fin.index = pd.to_datetime(MV_fin.index) ### Import uncleaned datasets ### os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Market-to-Book Ratio') BM_nor = pd.read_csv('MB Norway.csv' , sep=';', index_col=0); BM_nor.index = pd.to_datetime(BM_nor.index) BM_swe = pd.read_csv('MB Sweden.csv' , sep=';', index_col=0); BM_swe.index = pd.to_datetime(BM_swe.index) BM_dk = pd.read_csv('MB Denmark.csv' , sep=';', index_col=0); BM_dk.index = pd.to_datetime(BM_dk.index) BM_fin = pd.read_csv('MB Finland.csv' , sep=';', index_col=0); BM_fin.index = pd.to_datetime(BM_fin.index) ### Rename the ducplicates ### BM_nor.rename(columns={"NEXTGENTEL DEAD - DELIST.16/04/19 - MRKT VALUE TO BOOK":"NEXTGENTEL. DEAD - DELIST.16/04/19 - MRKT VALUE TO BOOK"}, inplace =True) BM_swe.rename(columns={"FABEGE 'B' DEAD - DELIST 02/12/97 - MRKT VALUE TO BOOK":"FABEGE 'B'. DEAD - DELIST 02/12/97 - MRKT VALUE TO BOOK", "FABEGE 'B' DEAD - TAKEOVER 505155 - MRKT VALUE TO BOOK":"FABEGE 'B'.. DEAD - TAKEOVER 505155 - MRKT VALUE TO BOOK"}, inplace =True) ### Parse rows in uncleaned datasets with the ones in cleaned ones ##### BM_nor = BM_nor.loc[BM_nor.index[BM_nor.index.isin(tr_nor.index)],:] BM_swe = BM_swe.loc[BM_swe.index[BM_swe.index.isin(tr_swe.index)],:] BM_dk = BM_dk.loc[BM_dk.index[BM_dk.index.isin(tr_dk.index)],:] BM_fin = BM_fin.loc[BM_fin.index[BM_fin.index.isin(tr_fin.index)],:] # Delete all the columns that don't change (delisted before our start date) BM_nor = BM_nor.loc[:,(abs(BM_nor.pct_change()).sum(axis=0) != 0)] BM_swe = BM_swe.loc[:,(abs(BM_swe.pct_change()).sum(axis=0) != 0)] BM_dk = BM_dk.loc[:,(abs(BM_dk.pct_change()).sum(axis=0) != 0)] BM_fin = BM_fin.loc[:,(abs(BM_fin.pct_change()).sum(axis=0) != 0)] ######## Get only company names as column names ######## def CleanColNames(Data): Data.columns = Data.columns.str.replace('[ ](-)[ ].*', '') return Data BM_nor_1 = CleanColNames(BM_nor.copy()) BM_swe_1 = CleanColNames(BM_swe.copy()) BM_dk_1 = CleanColNames(BM_dk.copy()) BM_fin_1 = CleanColNames(BM_fin.copy()) ### Parse columns in uncleaned datasets with the ones in cleaned ones ##### BM_nor_2 = BM_nor_1.loc[:,BM_nor_1.columns[BM_nor_1.columns.isin(tr_nor.columns)]] BM_swe_2 = BM_swe_1.loc[:,BM_swe_1.columns[BM_swe_1.columns.isin(tr_swe.columns)]] BM_dk_2 = BM_dk_1.loc[:,BM_dk_1.columns[BM_dk_1.columns.isin(tr_dk.columns)]] BM_fin_2 = BM_fin_1.loc[:,BM_fin_1.columns[BM_fin_1.columns.isin(tr_fin.columns)]] ### Delete all values not in clean dataset (repetition and before IPO) #### BM_nor_3 = BM_nor_2.sort_index(axis=1)[tr_nor.sort_index(axis=1).notna()] BM_swe_3 = BM_swe_2.sort_index(axis=1)[tr_swe.sort_index(axis=1).notna()] BM_dk_3 = BM_dk_2.sort_index(axis=1)[tr_dk.sort_index(axis=1).notna()] BM_fin_3 = BM_fin_2.sort_index(axis=1)[tr_fin.sort_index(axis=1).notna()] ### Parse columns in datasets with the ones in MV ##### BM_nor_4 = BM_nor_3.loc[:,BM_nor_3.columns[BM_nor_3.columns.isin(MV_nor.columns)]] BM_swe_4 = BM_swe_3.loc[:,BM_swe_3.columns[BM_swe_3.columns.isin(MV_swe.columns)]] BM_dk_4 = BM_dk_3.loc[:,BM_dk_3.columns[BM_dk_3.columns.isin(MV_dk.columns)]] BM_fin_4 = BM_fin_3.loc[:,BM_fin_3.columns[BM_fin_3.columns.isin(MV_fin.columns)]] ### Parse columns in datasets with the ones in MV ##### MV_nor_2 = MV_nor.loc[:,MV_nor.columns[MV_nor.columns.isin(BM_nor_4.columns)]] MV_swe_2 = MV_swe.loc[:,MV_swe.columns[MV_swe.columns.isin(BM_swe_4.columns)]] MV_dk_2 = MV_dk.loc[:,MV_dk.columns[MV_dk.columns.isin(BM_dk_4.columns)]] MV_fin_2 = MV_fin.loc[:,MV_fin.columns[MV_fin.columns.isin(BM_fin_4.columns)]] ### Calculate the Book Value BV_nor = MV_nor_2/BM_nor_4 BV_swe = MV_swe_2/BM_swe_4 BV_dk = MV_dk_2/BM_dk_4 BV_fin = MV_fin_2/BM_fin_4 ######## Write clean datasets to CSV ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') BV_nor.to_csv(r'BV Norway clean.csv') BV_swe.to_csv(r'BV Sweden clean.csv') BV_dk.to_csv(r'BV Denmark clean.csv') BV_fin.to_csv(r'BV Finland clean.csv') import re import os import numpy as np import pandas as pd import datetime as dt from plotly.offline import plot import plotly.graph_objs as go ########### Collect the Total Return Index Data ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Total Return Index') tr_nor = pd.read_excel('TR Norway ME.xlsx', index_col=0) tr_swe = pd.read_excel('TR Sweden ME.xlsx', index_col=0) tr_dk = pd.read_excel('TR Denmark ME.xlsx', index_col=0) tr_fin = pd.read_excel('TR Finland ME.xlsx', index_col=0) ########### Collect the IPO Data ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/IPO date') IPO_nor = pd.read_csv('IPO Norway.csv', sep=';') IPO_swe = pd.read_csv('IPO Sweden.csv', sep=';') IPO_dk = pd.read_csv('IPO Denmark.csv', sep=';') IPO_fin = pd.read_csv('IPO Finland.csv', sep=';') ########### Change directory back again ############## os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Total Return Index') ####### Remove last and first (errors with $ signs) ########### tr_nor = tr_nor.replace('NAN', np.nan).iloc[1:-2] tr_swe = tr_swe.replace('NAN', np.nan).iloc[1:-2] tr_dk = tr_dk.replace('NAN', np.nan).iloc[1:-1] tr_fin= tr_fin.replace('NAN', np.nan).iloc[1:-1] # Delete all the columns with just NA (had #Error column names) tr_nor.dropna(axis='columns', how='all', inplace=True) tr_swe.dropna(axis='columns', how='all', inplace=True) tr_dk.dropna(axis='columns', how='all', inplace=True) tr_fin.dropna(axis='columns', how='all', inplace=True) # Delete all the columns that don't change (delisted before our start date) tr_nor = tr_nor.loc[:,(abs(tr_nor.pct_change()).sum(axis=0) != 0)] tr_swe = tr_swe.loc[:,(abs(tr_swe.pct_change()).sum(axis=0) != 0)] tr_dk = tr_dk.loc[:,(abs(tr_dk.pct_change()).sum(axis=0) != 0)] tr_fin = tr_fin.loc[:,(abs(tr_fin.pct_change()).sum(axis=0) != 0)] last_row_nor = tr_nor.iloc[-1, :] last_row_swe = tr_swe.iloc[-1, :] last_row_dk = tr_dk.iloc[-1, :] last_row_fin = tr_fin.iloc[-1, :] def fix_lastrows(data_fix): last_row = data_fix.iloc[-1, :] count_column = 0 for column in data_fix.columns: df_current = data_fix.loc[:,column] validation_value = last_row[count_column] if count_column < (len(data_fix.columns)): for i in reversed(range(len(df_current))): # start with the last column if (data_fix.iloc[i, count_column] == validation_value): # chech if current value equals last value data_fix.iloc[i , count_column] = np.nan else: break count_column += 1 else: break return data_fix tr_nor_fix = fix_lastrows(tr_nor.copy()) tr_swe_fix = fix_lastrows(tr_swe.copy()) tr_dk_fix = fix_lastrows(tr_dk.copy()) tr_fin_fix = fix_lastrows(tr_fin.copy()) def add_last_row(Data, last_row): for i in Data: last_row_2 = Data.loc[Data.index[-2], i] if np.isnan(last_row_2): continue else: Data.loc[Data.index[-1], i] = last_row[i] return Data tr_nor_1 = add_last_row(tr_nor_fix, last_row_nor) tr_swe_1 = add_last_row(tr_swe_fix, last_row_swe) tr_dk_1 = add_last_row(tr_dk_fix, last_row_dk) tr_fin_1 = add_last_row(tr_fin_fix, last_row_fin) ######## Delete unnamed columns ######## def RemoveUnamed(Data): for col in Data.columns: if re.findall('Unnamed', col): Data.drop([col], axis=1, inplace=True) else: continue return Data tr_nor_2 = RemoveUnamed(tr_nor_1.copy()) tr_swe_2 = RemoveUnamed(tr_swe_1.copy()) tr_dk_2 = RemoveUnamed(tr_dk_1.copy()) tr_fin_2 = RemoveUnamed(tr_fin_1.copy()) ######## Get only company names (+DEAD) as column names ######## def CleanColNames(Data): Data.columns = Data.columns.str.replace('[ ](-)[ ].*', '') return Data tr_nor_3 = CleanColNames(tr_nor_2.copy()) tr_swe_3 = CleanColNames(tr_swe_2.copy()) tr_dk_3 = CleanColNames(tr_dk_2.copy()) tr_fin_3 = CleanColNames(tr_fin_2.copy()) ######### Retrieve company names ###################### def cleanIpo(Data): clean_list = str('[ ](ASA)|[ ](Plc)|[ ](PLC)|[ ](Oyj)|[ ](P/F)|[ ](Ltd)|[ ](Inc)|\ [ ](Inc)|[ ](SE)|[ ](As)|[ ](A/S)|[ ](SE)|[ ](AS)|[ ](A/S)|[ ](SA)|\ [ ](AS)|[ ](Abp)|[ ](AB)|[ ](Ab)|[ ](AG)|[ ](\(Publ\))|[ ](\(publ\))') Data['Company Common Name'] = Data['Company Common Name'].str.replace(clean_list, '') Data['Company Common Name'] = Data['Company Common Name'].str.upper() return Data IPO_nor_1 = cleanIpo(IPO_nor.copy()) IPO_swe_1 = cleanIpo(IPO_swe.copy()) IPO_dk_1 = cleanIpo(IPO_dk.copy()) IPO_fin_1 = cleanIpo(IPO_fin.copy()) ########## Delete values prior to IPO date ##################### ### KAHOOT! is missing the ! in the name ######### IPO_nor_1.iloc[135,0] = 'KAHOOT' def IPO_date(Data, IPO_data): for i in Data: ticker_tr = i for j in IPO_data.index: ticker_ipo = IPO_data['Company Common Name'][j] #ticker_ipo_dead = str(ticker_ipo + ' DEAD') ipo_date_str = str(IPO_data['IPO Date'][j]) if ipo_date_str == 'nan': continue else: if ticker_tr == ticker_ipo: ipo_date_obj = pd.to_datetime(IPO_data['IPO Date'][j]) ipo_date_me = ipo_date_obj.to_period('M').to_timestamp('M') ipo_ms = ipo_date_me.replace(day=1) prev_ipo_date_me = ipo_ms - dt.timedelta(days=1) Data[ticker_tr][prev_ipo_date_me] = 99999 #elif ticker_tr == ticker_ipo_dead: # Data[ticker_tr][prev_ipo_date_me] = 99999 else: continue return Data tr_nor_4 = IPO_date(tr_nor_3.copy(), IPO_nor_1.copy()) tr_dk_4 = IPO_date(tr_dk_3.copy(), IPO_dk_1.copy()) tr_swe_4 = IPO_date(tr_swe_3.copy(), IPO_swe_1.copy()) tr_fin_4 = IPO_date(tr_fin_3.copy(), IPO_fin_1.copy()) def remove_ipo_cols(Data): for c in Data: for i in reversed(range(len(Data[c]))): if Data[c][i] == 99999: row = i Data[c][row] = np.nan for j in reversed(range(row)): Data[c][j] = np.nan else: continue return Data tr_nor_4 = remove_ipo_cols(tr_nor_4.copy()) tr_dk_4 = remove_ipo_cols(tr_dk_4.copy()) tr_swe_4 = remove_ipo_cols(tr_swe_4.copy()) tr_fin_4 = remove_ipo_cols(tr_fin_4.copy()) #### Siste cleaning ##### ### Import clean data sets ##### os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') prices_nor = pd.read_csv('Price Norway ME clean.csv', index_col=0) prices_swe = pd.read_csv('Price Sweden ME clean.csv', index_col=0) prices_dk = pd.read_csv('Price Denmark ME clean.csv', index_col=0) prices_fin = pd.read_csv('Price Finland ME clean.csv', index_col=0) ### Parse columns in cleaned tr- with the ones in cleaned prices ##### tr_nor_4 = tr_nor_4.loc[:,tr_nor_4.columns[tr_nor_4.columns.isin(prices_nor.columns)]] tr_swe_4 = tr_swe_4.loc[:,tr_swe_4.columns[tr_swe_4.columns.isin(prices_swe.columns)]] tr_dk_4 = tr_dk_4.loc[:,tr_dk_4.columns[tr_dk_4.columns.isin(prices_dk.columns)]] tr_fin_4 = tr_fin_4.loc[:,tr_fin_4.columns[tr_fin_4.columns.isin(prices_fin.columns)]] ######## Write to CSV ######### os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') tr_nor_4.to_csv(r'TR Norway Clean.csv') tr_swe_4.to_csv(r'TR Sweden Clean.csv') tr_dk_4.to_csv(r'TR Denmark Clean.csv') tr_fin_4.to_csv(r'TR Finland Clean.csv') import os import pandas as pd os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Valuta') ######################### Exchange Rates ######################### usdnok = pd.read_csv('NOK_USD.csv', index_col=0); usdnok.index = pd.to_datetime(usdnok.index); usdsek = pd.read_csv('SEK_USD.csv', index_col=0); usdsek.index = pd.to_datetime(usdsek.index); usddkk = pd.read_csv('DKK_USD.csv', index_col=0); usddkk.index = pd.to_datetime(usddkk.index); eurusd = pd.read_csv('EUR_USD.csv', index_col=0); eurusd.index = pd.to_datetime(eurusd.index); usdfmr = pd.read_csv('FMR_USD.csv', index_col=0); usdfmr.index = pd.to_datetime(usdfmr.index); os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') ######################### Original Universe dataframe ######################### mv_nor = pd.read_csv('MV Norway clean.csv', index_col=0) mv_swe = pd.read_csv('MV Sweden clean.csv', index_col=0) mv_dk = pd.read_csv('MV Denmark clean.csv', index_col=0) mv_fin = pd.read_csv('MV Finland clean.csv', index_col=0) ## Slice on the dates we have MV for ## and chance '.' values to value 1 day before def Clean(ExR, MV): ExR1 = ExR.loc[ExR.index[ExR.index.isin(MV.index)],:] ExR1[ExR1.iloc[:,0] == '.'] = ExR.shift(1).loc[ExR1[ExR1.iloc[:,0] == '.'].index,:] return ExR1 nokusd1 = Clean(usdnok.copy(), mv_nor.copy()) sekusd1 = Clean(usdsek.copy(), mv_swe.copy()) dkkusd1 = Clean(usddkk.copy(), mv_dk.copy()) eurusd1 = Clean(eurusd.copy(), mv_fin.copy()) # Change 1st of month dates to last of previus month # ('1990-03-01') --> ('1990-02-28') usdfmr = usdfmr.iloc[230:, :] usdfmr.index = nokusd1.iloc[:len(usdfmr),:].index usdfmr1 = Clean(usdfmr.copy(), mv_fin.copy()) # change from euro/usd to usd/euro (i.e. to 1 usd = xEUR) nokusd2 = nokusd1.astype(float) sekusd2 = sekusd1.astype(float) dkkusd2 = dkkusd1.astype(float) usdeur = 1/eurusd1.astype(float) usdfmr2 = usdfmr1.astype(float) # Concat Finish mark and Euro usdfmreur = pd.concat([usdfmr2.rename(columns = {'EXFNUS':'FN_EU_US'}), usdeur.rename(columns = {'DEXUSEU':'FN_EU_US'}).iloc[35:,:]]) os.getcwd() os.chdir('/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean') nokusd2.to_csv(r'USDNOK clean.csv') sekusd2.to_csv(r'USDSEK clean.csv') dkkusd2.to_csv(r'USDDKK clean.csv') usdfmreur.to_csv(r'USDFMREUR clean.csv') import os import pandas as pd import datetime as dt os.chdir('C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Raw\\Risk Free Rate') #load 10 year gov bonds all countries rfr = pd.read_csv('10ygovbonds.csv', index_col=0); rfr.index = pd.to_datetime(rfr.index); os.chdir('C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean') tr_nor = pd.read_csv('TR Norway clean.csv', index_col=0); tr_nor.index = pd.to_datetime(tr_nor.index); #change name rfr = rfr.rename(columns={'IRLTLT01NOM156N':'rfr_nor', 'IRLTLT01SEM156N':'rfr_swe', 'IRLTLT01DKM156N':'rfr_dk', 'IRLTLT01FIM156N':'rfr_fin'}) date_time_str = '2020-01-01 00:00:00' date_time_obj = dt.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S') last_val = pd.DataFrame({'rfr_nor': [1.5], 'rfr_swe': [1.5], 'rfr_dk': [1.5], 'rfr_fin': [1.5]},index=[date_time_obj]) rfr = rfr.append(last_val) rfr_m = rfr.resample('M').ffill().reset_index() rfr_m = rfr_m.set_index('index') rfr_m = (1+ rfr_m/100)**(1/12)-1 #match index with tr nor rfr_m = rfr_m.loc['1990-02-28 00:00:00':'2019-12-31 00:00:00'] rfr_m.index = tr_nor.index os.chdir('C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean') rfr_m.to_csv('rfr_monthly.csv') import os import numpy as np import pandas as pd import datetime as dt dir_idx = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Raw/Index' dir_clean = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Data Clean' dir_df = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Plot code/Dataframes' dir_returns = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns' dir_stats = '/Users/jamz/Google Drive/MSc Bus. Finance, QTEM/4th Semester, BI/4th Sem Master Thesis /B. Dataanalyse/Strategy/Returns/Stats dataframes all' ''' dir_idx = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Raw\\Index' dir_clean = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Data Clean' dir_df = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Plot code\\Dataframes' dir_returns = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns' dir_stats = 'C:\\Users\\ander\\Google Drive\\4th Sem Master Thesis_\\B. Dataanalyse\\Strategy\\Returns\\Stats dataframes all' ''' os.chdir(dir_idx) #import index data and rename columns index_nor = pd.read_csv('INDEX ME NORWAY.csv', sep=';', decimal=',', index_col=0) ;index_nor = index_nor.rename(columns={'OSLO EXCHANGE ALL SHARE - TOT RETURN IND':'OSEAX', 'OSLO EXCHANGE BENCHMARK - TOT RETURN IND':'OSEBX', 'MSCI NORWAY - TOT RETURN IND':'MSCI'}) index_swe = pd.read_csv('INDEX ME SWEDEN.csv', sep=';', decimal=',',index_col=0) ;index_swe = index_swe.rename(columns={'OMX STOCKHOLM 30 (OMXS30) - TOT RETURN IND':'OMXS30', 'OMX STOCKHOLM (OMXS) - TOT RETURN IND':'OMXS', 'MSCI SWEDEN - TOT RETURN IND':'MSCI'}) index_dk = pd.read_csv('INDEX ME DENMARK.csv', sep=';', decimal=',', index_col=0) ;index_dk = index_dk.rename(columns={'OMX COPENHAGEN (OMXC20) - TOT RETURN IND':'OMXC20', 'OMX COPENHAGEN (OMXC) - TOT RETURN IND':'OMXC', 'MSCI DENMARK - TOT RETURN IND':'MSCI'}) index_fin = pd.read_csv('INDEX ME FINLAND.csv', sep=';', decimal=',', index_col=0);index_fin = index_fin.rename(columns={'OMX HELSINKI (OMXH) - TOT RETURN IND':'OMXH', 'MSCI FINLAND - TOT RETURN IND':'MSCI'}) #convert index to datetime index_nor.index = pd.to_datetime(index_nor.index) index_swe.index = pd.to_datetime(index_swe.index) index_dk.index = pd.to_datetime(index_dk.index) index_fin.index = pd.to_datetime(index_fin.index) #Create variables with MCSI and total benchmark index msci_nor = index_nor.drop(columns=['OSEAX', 'OSEBX']) msci_swe = index_swe.drop(columns=['OMXS30', 'OMXS']) msci_dk = index_dk.drop(columns=['OMXC', 'OMXC20']) msci_fin = index_fin.drop(columns=['OMXH']) ### 10-year (Government bonds annual average) ### os.chdir(dir_clean) rfr = pd.read_csv('Tbill clean.csv', index_col=0); rfr.index = pd.to_datetime(rfr.index); ###### Transaction costs ###### trans_cost = 0.002 def TCstep(returns): for posi in returns: for count in range(1,len(returns[posi].index),12): month_ret = returns[posi][count] if month_ret > 0: returns[posi][count] = month_ret*(1-(trans_cost)) elif month_ret < 0: returns[posi][count] = month_ret*(1+(trans_cost)) return returns ####### Parameters ###### form_period = [1] holding_period = [1] ###### Save stats to ####### position = ['MSCI'] market = ['Norway', 'Sweden', 'Denmark', 'Finland'] stat = ['Mean (R)', 'CAGR', 'Volatility (%)','Sharpe', 'Sortino','Max DD (%)'] df_stats = pd.DataFrame(np.random.random_sample((len(stat), len(holding_period))), index=stat,columns=holding_period) df_stats = pd.DataFrame(pd.concat([df_stats]*len(form_period),axis=1, keys=form_period)) df_stats1 = pd.DataFrame(pd.concat([df_stats]*len(position),axis=1, keys=position)) stats = pd.DataFrame(pd.concat([df_stats1]*len(market),axis=0, keys=market)) stats.columns.names = ['Portfolio', 'Formation','Holding'] ###### Transaction costs ###### ####### iterate through the different formation periods #### for form in form_period: hold = 1 ####### iterate through the different formation periods #### def SimpleReturns(prices): ret_sim = prices/prices.shift(1) - 1 return ret_sim returns_nor = SimpleReturns(msci_nor.copy()) returns_swe = SimpleReturns(msci_swe.copy()) returns_dk = SimpleReturns(msci_dk.copy()) returns_fin = SimpleReturns(msci_fin.copy()) def ReturnStart(returns): returns = returns.loc['1991-03-29':'2019-12-31 '] returns.iloc[0] = np.nan return returns returns_nor = ReturnStart(returns_nor.copy()) returns_swe = ReturnStart(returns_swe.copy()) returns_dk = ReturnStart(returns_dk.copy()) returns_fin = ReturnStart(returns_fin.copy()) returns_nor = TCstep(returns_nor.copy()) returns_swe = TCstep(returns_swe.copy()) returns_dk = TCstep(returns_dk.copy()) returns_fin = TCstep(returns_fin.copy()) ''' def ExcRet(returns, rfr): excess_returns = pd.DataFrame() excess_returns['MSCI'] = returns['MSCI']-rfr.loc[returns.index[0]:returns_nor.index[-1]] return excess_returns excret_nor = ExcRet(returns_nor.copy(), rfr['Norway']) excret_swe = ExcRet(returns_swe.copy(), rfr['Sweden']) excret_dk = ExcRet(returns_dk.copy(), rfr['Denmark']) excret_fin = ExcRet(returns_fin.copy(), rfr['US']) ''' returns_nor_100 = returns_nor * 100 returns_swe_100 = returns_swe * 100 returns_dk_100 = returns_dk * 100 returns_fin_100 = returns_fin * 100 ### Cumulative Returns ### def CumulativeReturns(simple_ret): cumret = (1 + simple_ret).cumprod() cumret.iloc[0] = 1 return cumret cum_nor = CumulativeReturns(returns_nor.copy()); cum_swe = CumulativeReturns(returns_swe.copy()); cum_dk = CumulativeReturns(returns_dk.copy()); cum_fin = CumulativeReturns(returns_fin.copy()); ## Calculate the average annual return ## def AnnMeanMonthReturn(returns): return returns.mean()*12 #### Calculate the componded annual growth rate #### def CAGR(cumulative): return (cumulative.iloc[-1])**(12/len(cumulative-1)) -1 #### Calculate the standard diviation of returns and save to stats #### def AnnStd(returns): return returns.std()*np.sqrt(12) def Sharpe(returns): return np.sqrt(12) * returns.mean() / returns.std() # Calculate Sortino ratio. Same as Sharpe, but only accounting for downside risk def DownsideRet(returns): return returns.loc[returns<0] def Sortino(returns, down_ret): return np.sqrt(12) * returns.mean() / down_ret.std() #### Calculate the maximum drawdown and save to stats #### def MaxDD(cumret): Roll_Max = cumret.cummax() Monthly_Drawdown = cumret/Roll_Max - 1.0 return Monthly_Drawdown for pos in position: # Calculate Annual Return and save it in current stats dataframe ann_return_nor = round((AnnMeanMonthReturn(returns_nor[pos])*100),2) ann_return_swe = round((AnnMeanMonthReturn(returns_swe[pos])*100),2) ann_return_dk = round((AnnMeanMonthReturn(returns_dk[pos])*100),2) ann_return_fin = round((AnnMeanMonthReturn(returns_fin[pos])*100),2) stats.loc[market[0],stat[0]][pos,form,hold] = ann_return_nor stats.loc[market[1],stat[0]][pos,form,hold] = ann_return_swe stats.loc[market[2],stat[0]][pos,form,hold] = ann_return_dk stats.loc[market[3],stat[0]][pos,form,hold] = ann_return_fin # Calculate compound annual growth rate and save it in current stats dataframe cagr_nor = round((CAGR(cum_nor[pos])*100),2) cagr_swe = round((CAGR(cum_swe[pos])*100),2) cagr_dk = round((CAGR(cum_dk[pos])*100),2) cagr_fin = round((CAGR(cum_fin[pos])*100),2) stats.loc[market[0],stat[1]][pos,form,hold] = cagr_nor stats.loc[market[1],stat[1]][pos,form,hold] = cagr_swe stats.loc[market[2],stat[1]][pos,form,hold] = cagr_dk stats.loc[market[3],stat[1]][pos,form,hold] = cagr_fin # Calculate Annual STD and save to current stats dataframe ann_std_nor = round(AnnStd(returns_nor[pos])*100,2) ann_std_swe = round(AnnStd(returns_swe[pos])*100,2) ann_std_dk = round(AnnStd(returns_dk[pos])*100,2) ann_std_fin = round(AnnStd(returns_fin[pos])*100,2) stats.loc[market[0], stat[2]][pos,form,hold] = ann_std_nor stats.loc[market[1], stat[2]][pos,form,hold] = ann_std_swe stats.loc[market[2], stat[2]][pos,form,hold] = ann_std_dk stats.loc[market[3], stat[2]][pos,form,hold] = ann_std_fin # Calculate annual Sharpe ratio and save to current stats dataframe sharpe_nor = round(Sharpe(returns_nor[pos]),2) sharpe_swe = round(Sharpe(returns_swe[pos]),2) sharpe_dk = round(Sharpe(returns_dk[pos]),2) sharpe_fin = round(Sharpe(returns_fin[pos]),2) stats.loc[market[0], stat[3]][pos,form,hold] = sharpe_nor stats.loc[market[1], stat[3]][pos,form,hold] = sharpe_swe stats.loc[market[2], stat[3]][pos,form,hold] = sharpe_dk stats.loc[market[3], stat[3]][pos,form,hold] = sharpe_fin # Calculate Downsiderisk and Sortino ratio and save to current stats dataframe down_ret_nor = DownsideRet(returns_nor[pos]) down_ret_swe = DownsideRet(returns_swe[pos]) down_ret_dk = DownsideRet(returns_dk[pos]) down_ret_fin = DownsideRet(returns_fin[pos]) sortino_nor = round(Sortino(returns_nor[pos], down_ret_nor),2) sortino_swe = round(Sortino(returns_swe[pos], down_ret_swe),2) sortino_dk = round(Sortino(returns_dk[pos], down_ret_dk),2) sortino_fin = round(Sortino(returns_fin[pos], down_ret_fin),2) stats.loc[market[0], stat[4]][pos,form,hold] = sortino_nor stats.loc[market[1], stat[4]][pos,form,hold] = sortino_swe stats.loc[market[2], stat[4]][pos,form,hold] = sortino_dk stats.loc[market[3], stat[4]][pos,form,hold] = sortino_fin # Calculate Maximum Drawdown and save to current stats dataframe max_dd_nor = round(MaxDD(cum_nor[pos].copy()).min()*100, 2) #only extract min value (maxDD) max_dd_swe = round(MaxDD(cum_swe[pos].copy()).min()*100, 2) max_dd_dk = round(MaxDD(cum_dk[pos].copy()).min()*100, 2) max_dd_fin = round(MaxDD(cum_fin[pos].copy()).min()*100, 2) stats.loc[market[0], stat[5]][pos,form,hold] = max_dd_nor stats.loc[market[1], stat[5]][pos,form,hold] = max_dd_swe stats.loc[market[2], stat[5]][pos,form,hold] = max_dd_dk stats.loc[market[3], stat[5]][pos,form,hold] = max_dd_fin ##### Save dfs #### if (form == 1 and hold == 1): os.chdir(dir_df) max_dd_rolling_nor = round(MaxDD(cum_nor.copy())*100, 2) max_dd_rolling_swe = round(MaxDD(cum_swe.copy())*100, 2) max_dd_rolling_dk = round(MaxDD(cum_dk.copy())*100, 2) max_dd_rolling_fin = round(MaxDD(cum_fin.copy())*100, 2) #Save stats computed with returns (Returns, Sharpe, Sortino, STD) df_list = [returns_nor, returns_swe, returns_dk, returns_fin] name_list = ['returns_nor_msci_F{}-H{}'.format(form, hold), 'returns_swe_msci_F{}-H{}'.format(form, hold), 'returns_dk_msci_F{}-H{}'.format(form, hold), 'returns_fin_msci_F{}-H{}'.format(form, hold)] for i in range(len(df_list)): df_list[i].to_pickle(name_list[i]+'.pkl') #Save stats computed with raw returns (Cum, MDD) df_list_raw = [cum_nor, cum_swe, cum_dk, cum_fin, max_dd_rolling_nor, max_dd_rolling_swe, max_dd_rolling_dk, max_dd_rolling_fin, returns_nor, returns_swe, returns_dk, returns_fin] name_list_raw = ['cum_nor_msci_F{}-H{}'.format(form, hold), 'cum_swe_msci_F{}-H{}'.format(form, hold), 'cum_dk_msci_F{}-H{}'.format(form, hold), 'cum_fin_msci_F{}-H{}'.format(form, hold), 'max_dd_rolling_nor_msci_F{}-H{}'.format(form, hold), 'max_dd_rolling_swe_msci_F{}-H{}'.format(form, hold), 'max_dd_rolling_dk_msci_F{}-H{}'.format(form, hold), 'max_dd_rolling_fin_msci_F{}-H{}'.format(form, hold), 'returns_nor_msci_F{}-H{}'.format(form, hold), 'returns_swe_msci_F{}-H{}'.format(form, hold), 'returns_dk_msci_F{}-H{}'.format(form, hold), 'returns_fin_msci_F{}-H{}'.format(form, hold)] for i in range(len(df_list_raw)): df_list_raw[i].to_pickle(name_list_raw[i]+'_raw.pkl') ######## Write clean datasets to CSV ######### os.chdir(dir_returns) writer = pd.ExcelWriter('Statistics MSCI.xlsx', engine='xlsxwriter') for pos in position: stats.loc[:][pos].to_excel(writer, sheet_name = '{}'.format(pos)) workbook = writer.book worksheet = writer.sheets[pos] writer.save() ##### LATEX CODE #### os.chdir(dir_stats) stats.to_pickle('stats_msci.pkl')