Hello! Hope everyone's development codes are working smoothly. I am currently trying to bridge a basic Reinforcement Learning algorithm with Binance exchange API to paper trade the algorithm. I have posted several very basic RL algos, that can have very little in feature engineering and are readily available on the internet, so I feel I'm not sharing anything too deep on the trading deck of RL trading algos. Hoping to find someone who can help me on my journey to bridge these historical back testing RL algos to an actual live paper trading account on Binance. I'll be working on it on the side, and if I find anything before a solution is posted, will be willing to share it back here with all of you Thanks in advance, and feel free to do just 1 or both!
P.S.: The granularity of the code is rather big, so I'm not looking for a solution overnight. I understand this may take some serious time, and I am willing to check back on this post over the year periodically to add things that I've learned, and to see if anybody else has come up with creative solutions
P.P.S.: I find that RL 2 is the in my personal opinion, the best algo overall, in that it provides the ability to both go long and short an equity. However, since you cannot short crypto except through a secondary less exposed futures market, I understand if you wish to cut out the shorting code, in that it is not necessary if you are directly trading crypto.
RL 1:
import numpy as np
import pandas as pd
# create timesteps
time = np.arange (0, 50, 0.1)
# Assign amplitude and normalise above 0
amplitude = np.sin(time)
amplitude = amplitude + 1
max_amp = max(amplitude)
amplitude = amplitude / max_amp
# Construct Dataframe
df = pd.DataFrame(amplitude)
df.columns = ["Close"]
df["Close_Rt"] = df ["Close"].pct_change()
df = df.replace(np.inf, np.nan)
df = df.dropna()
df = df.reset_index(drop=True)
# Show dataframe and values
print(f"Length: {len(df)}")
print("Min Close: ", df["Close"].min())
print("Max Close: ", df["Close"].max())
df.head(2)
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (15,3)
df["Close"].plot()
# ENVIRONMENT SETUP AND CLASS
import gym
from gym import spaces # Use Gym 0.24.0 from pip install
import numpy as np
import random
import torch
# Initialize Variabls
MAX_INT = 2147483647
MAX_OPEN_POSITIONS = 1
INITIAL_ACCOUNT_BALANCE = 1000
PERCENT_CAPITAL = 0.1
TRADING_COSTS_RATE = 0.001
KILL_THRESH = 0.4 # Terminat if balance too low
# Build Environment Class
class StockTradingEnv(gym.Env):
""" A stock trading environment with Open AI gym"""
metadata = {'render.modes': ['human']}
def __init__(self, df):
super(StockTradingEnv, self).__init__()
# Generic variabls
self.df = df
# Account variables
self.account_balance = INITIAL_ACCOUNT_BALANCE
self.net_worth = INITIAL_ACCOUNT_BALANCE
self.realized_profit = 0
self.unrealized_profit = 0
self.last_profit = 0
# Position Variables
self.open_quantities = []
self.open_prices = []
self.trading_costs = 0
self.open_positions = 0
self.closed_positions = 0
self.incorrect_position_calls = 0
self.num_trades = 0
self.held_for_period = 0
# Current Step
self.current_step = 0
self.max_steps = len(df)
# Actions of the format Long, Hold and Close
self.action_space = spaces.Discrete(3)
# Setup observation Space
self.observation_space = spaces.Box(low=-1, high=1, shape=(8,), dtype=np.float32)
# Reward Structure
def calculate_reward(self):
reward = 0
if self.num_trades:
reward += self.realized_profit / self.num_trades
reward += self.unrealized_profit /self.num_trades * 0.3
reward += 1 if self.last_profit > 0 else 0
reward -= 2 if self.incorrect_position_calls > 0 else 0
if reward <= 0:
reward -= 2
return reward
# Observation Structure
def _next_observation(self):
close_item = self.df.loc[self.current_step, "Close"].item()
close_rt_item = self.df.loc[self.current_step, "Close_Rt"].item()
close_T1_item = self.df.loc[self.current_step - 1, "Close_Rt"].item()
close_T2_item = self.df.loc[self.current_step - 2, "Close_Rt"].item()
close_T3_item = self.df.loc[self.current_step - 3, "Close_Rt"].item()
close_T4_item = self.df.loc[self.current_step - 4, "Close_Rt"].item()
current_position = 1 if self.open_positions else 0
num_trades = self.num_trades / len(self.df) if self.num_trades > 0 else 0
obs = np.array([close_item,close_rt_item,close_T1_item,close_T2_item,close_T3_item,close_T4_item,current_position,num_trades])
return obs
# Calculate the open positions value
def _calculate_open_value(self):
open_trades_value = 0
counts = 0
for qty in self.open_quantities:
acquisition_price = self.open_prices[counts]
open_trades_value += acquisition_price * qty
counts += 1
return open_trades_value
# Calculate net profit
def _profit_calculation(self, current_price, calc_type):
open_trades_value = self._calculate_open_value()
total_quantity_held = sum(self.open_quantities)
current_value = total_quantity_held * current_price
gross_profit = current_value - open_trades_value
if calc_type == "close_position":
trading_costs = current_value * TRADING_COSTS_RATE
self.trading_costs += trading_costs
elif calc_type == "hold_position" or calc_type == "open_position":
trading_costs = open_trades_value * TRADING_COSTS_RATE
net_profit = gross_profit - trading_costs
return net_profit
# Action Management
def _take_action(self, action):
current_price = self.df.loc[self.current_step, "Close"].item()
# Reset last profit
self.last_profit = 0
self.incorrect_position_calls = 0
# Go Long
if action == 0:
if self.open_positions < MAX_OPEN_POSITIONS:
net_profit = self._profit_calculation(current_price, "open_position")
net_worth = self.net_worth + net_profit
trading_allowance = net_worth * PERCENT_CAPITAL
self.open_quantities.append(trading_allowance / current_price)
self.open_prices.append(current_price)
self.trading_costs += trading_allowance * TRADING_COSTS_RATE
self.num_trades += 1