Using Machine & Deep Learning for Algorithmic FX Trading
Dr Yves J Hilpisch | The AI Machine
import math
import tpqoa
import cufflinks
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn')
%matplotlib inline
cufflinks.set_config_file(offline=True)
import warnings
warnings.simplefilter('ignore')
Why Oanda and why FX?
api = tpqoa.tpqoa('dyjh.cfg')
api.get_instruments()[:4]
sym = 'EUR_USD'
raw_a = api.get_history(sym, '2019-02-04', '2019-02-06', 'M1', 'A')
raw_b = api.get_history(sym, '2019-02-04', '2019-02-06', 'M1', 'B')
raw_a.info()
sel = list('c')
spread = (raw_a['c'] - raw_b['c']).mean()
spread # average spread
data = ((raw_a[sel] + raw_b[sel]) / 2)
ptc = spread / data['c'].mean()
ptc # mean spread relative to mean mid price
data.head()
data['c'].plot();
lags = 7
cols = []
for lag in range(1, lags + 1):
col = 'lag_{}'.format(lag)
data[col] = data['c'].shift(lag) # lagged prices
cols.append(col)
data.dropna(inplace=True)
reg = np.linalg.lstsq(data[cols], data['c'], rcond=-1)[0]
np.set_printoptions(precision=4)
reg
pd.DataFrame(reg, index=cols).plot(kind='bar');
Investopedia writes:
Chart patterns look at the big picture and help to identify trading signals — or signs of future price movements.
The theory behind chart patterns is based on this assumption — that certain patterns consistently reappear and tend to produce the same outcomes.
The process of identifying chart patterns based on these criteria can be subjective in nature, which is why charting is often seen as more of an art than a science.
data['r'] = np.log(data['c'] / data['c'].shift(1))
cols = []
for lag in range(1, lags + 1):
col = 'lag_{}'.format(lag)
data[col] = data['r'].shift(lag) # lagged returns
cols.append(col)
data.dropna(inplace=True)
data[cols] = np.where(data[cols] > 0, 1, -1)
data[cols] = data[cols].astype(int)
data.head(5)
data['d'] = np.sign(data['r']).astype(int)
data.groupby(cols[:2])['d'].count()
data.groupby(cols[:2] + ['d'])['r'].count()
(data.groupby(cols[:2] + ['d'])['r'].count() / len(data) * 100).round(2)
cols[:3] + ['d']
grouped = data[cols[:3] + ['d']].groupby(cols[:3] + ['d'])
res = grouped['d'].size().unstack()
res
res['prob_up'] = (res[1] / (res[1] + res[-1])).round(3)
res['prob_down'] = 1 - res['prob_up']
res
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver='lbfgs', multi_class='auto')
lr.fit(data[cols], data['d'])
y_lr = lr.predict(data[cols])
accuracy_score(y_lr, data['d'])
nb = GaussianNB()
nb.fit(data[cols], data['d'])
y_nb = nb.predict(data[cols])
accuracy_score(y_nb, data['d'])
kernels = ['linear', 'rbf', 'poly']
models = {}
for kernel in kernels:
svm = SVC(C=5, kernel=kernel, gamma='auto')
svm.fit(data[cols], data['d'])
y_svm = svm.predict(data[cols])
acc = accuracy_score(y_svm, data['d'])
print('kernel: {:8s} | accuracy: {:6.3f}'.format(kernel, acc))
models[kernel] = svm
models
dnn = MLPClassifier(hidden_layer_sizes=3 * [96], activation='relu',
max_iter=2500, verbose=False)
%time dnn.fit(data[cols], data['d'])
y_dnn = dnn.predict(data[cols])
accuracy_score(y_dnn, data['d'])
NO TRANSACTION COSTS | ONLY IN-SAMPLE PERFORMANCE</style>
data['p'] = models['rbf'].predict(data[cols])
data['s_svm'] = data['r'] * data['p']
(data['p'].diff() != 0).sum()
data['p'] = dnn.predict(data[cols])
data['s_dnn'] = data['r'] * data['p']
(data['p'].diff() != 0).sum()
data[['s_svm', 's_dnn', 'r']].cumsum().apply(np.exp).plot();