Dr. Yves J. Hilpisch | The Python Quants GmbH | http://tpq.io
import numpy as np
import pandas as pd
from pylab import plt, mpl
plt.style.use('seaborn')
mpl.rcParams['figure.dpi'] = 300
%config InlineBackend.figure_format = 'jpg'
# data from Refinitiv Workspace/Eikon data API
url = 'https://certificate.tpq.io/mlfin.csv'
raw = pd.read_csv(url, index_col=0, parse_dates=True)
raw.columns
Index(['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N', '.SPX', '.VIX', 'SPY', 'EUR=', 'XAU=', 'GDX', 'GLD', 'BTC='], dtype='object')
del raw['BTC=']
raw.dropna(inplace=True)
sym = '.SPX'
# sym = 'AAPL.O'
rets = np.log(raw / raw.shift(1)).dropna()
mean = rets[sym].mean()
mean
0.0003947829170345228
std = rets[sym].std()
std
0.011018002078001089
tail_right = mean + 2 * std
tail_right
0.022430787073036702
tail_left = mean - 2 * std
tail_left
-0.021641221238967653
nor = len(rets[sym][rets[sym].sort_values() > tail_right].values)
nor # number of large positive returns
45
nol = len(rets[sym][rets[sym].sort_values() < tail_left].values)
nol # number of large negative returns
72
rets[sym].cumsum().apply(np.exp).plot();
np.exp(rets.sort_values(sym)[sym].values.cumsum())
array([0.8801595 , 0.79644517, 0.73593948, ..., 2.25957578, 2.46942528, 2.70112567])
plt.bar(np.arange(len(rets)),
np.exp(rets.sort_values(sym)[sym].values.cumsum()));
plt.axvline(len(rets) - nor, c='r', lw=1)
plt.axvline(nol, c='r', lw=1);
... as a long investor.
x_ = range(0, 51, 5)
# gross performance when missing the x largest positive returns
r_ = list()
for x in x_:
r = np.exp(rets.sort_values(sym)[sym].values.cumsum())[-1-x]
r_.append(r)
print(f'{x:3d} | {r:.3f}')
0 | 2.701 5 | 1.875 10 | 1.503 15 | 1.279 20 | 1.108 25 | 0.972 30 | 0.857 35 | 0.758 40 | 0.673 45 | 0.600 50 | 0.538
plt.bar(x_, r_)
for label, value in zip(x_, r_):
plt.text(label - 1, value + 0.025, str(round(value, 1)));
... as a long investor.
# gross performance when avoiding the x largest negative returns
r_ = list()
for x in x_:
r = np.exp(rets.sort_values(sym)[sym].values[x:].sum())
r_.append(r)
print(f'{x:3d} | {r:.3f}')
0 | 2.701 5 | 4.113 10 | 5.172 15 | 6.324 20 | 7.582 25 | 8.992 30 | 10.576 35 | 12.314 40 | 14.250 45 | 16.402 50 | 18.679
plt.bar(x_, r_)
for label, value in zip(x_, r_):
plt.text(label - 1, value + 0.2, str(round(value, 1)));