HI all
I have run in to the following issue while preparing the data to be inputted in to lstm type of network , though time delta and relativedelta ary very cool it takes time abit to calculate though this is not the main issue , however I will walk you through , first through a very quick copy of code then , I will point out where the issue is
#Read in Price data, use date as index
Prices = pd.read_csv("USDprices.csv")
Prices.head()
Prices['Date'].dtype
Prices['Date'] = pd.to_datetime(Prices['Date'], format='%Y-%m-%d')
Prices.head()
Prices.index = Prices['Date']
# print(Prices.head())
#Reading in Sentiment Data and selecting News-only-MarketRisk Sentiment
Sent = pd.read_csv("USDSentiment.csv")
Sent = Sent[Sent.dataType=='News'][['Date', 'marketRisk']].fillna(method = "ffill")
# Sent.head()
Sent['Date'] = pd.to_datetime(Sent['Date'], format='%Y-%m-%d')
Sent.index = Sent['Date']
# print(Sent.head())
#Combining Price table and Sentiment Table
Ana = Prices.merge(Sent, left_index= True, right_index=True, how='inner')
Ana.head()
Ana.columns
Ana = Ana.drop(columns=['Unnamed: 0', 'Volume', 'Asset', 'Date_x', 'Date_y', 'Open', 'High', 'Low', 'UnadjClose'])
Ana.head()
# calculating r, %tage change of closing price over previous time step
Ana['returns'] = (Ana['Close']- Ana['Close'].shift(1))/Ana['Close'].shift(1)
Ana.head()
# calculating target price
Ana['Target'] = Ana["Close"].diff(1).shift(-1)
Ana.head()
# calculating %change
Ana['Change'] = Ana["returns"].shift(-1)
Ana.head()
# then sofar so good , then below I continue adding a label via a loop
## Label each time step False(no-buy) or True(buy) based on whether the price will rise at the next closing price
timestep = 1
labels = []
for i in range(0, Ana.shape[0]):
if(i+timestep< Ana.shape[0]):
aheadGain = [Ana["returns"][i+j] for j in range(1,timestep+1)]
labels+= [np.sum(aheadGain)> 0]
print(len(labels))
# Price Table with Labels as Signal
Ana = Ana.iloc[:-timestep,:].copy()
Ana.head()
Ana['Signal'] = labels
Ana.head()
# then I will add some extra features
#Feature Engineering Ana["Close30"] = Ana["Close"].rolling(30).mean() Ana["Close100"] = Ana["Close"].rolling(100).mean() Ana["r1"] = Ana["Close"].diff(1) Ana["marketrisk_avg7"] = Ana["marketRisk"].rolling(7).mean() #Drop NA Ana = Ana.dropna(0) Ana.head() then here the following is defined as a function,to divide the original dataset in to training, validating and testing examples def series_to_supervised(df, n_in=1, n_out=1): n_vars = 1 if type(df) is list else df.shape[1] cols, names = list(), list() # input sequence (t-n, ... t-1) for i in range(n_in, -1, -1): cols.append(df.shift(i)) names += [(df.columns[j]+'(t-%d)' % (i)) for j in range(n_vars)] # put it all together agg = concat(cols, axis=1) agg.columns = names return agg #Features Selection as below Set = Ana[['r1','marketrisk_avg7', 'Close30','Close100']] FEATURES_SHAPE = Set.shape[1] print(FEATURES_SHAPE) #Forming examples SEQ_LEN = 5 Set = series_to_supervised(Set, SEQ_LEN-1, 0) Set[["Change", "Signal"]] = Ana[["Change", "Signal"]] Set = Set.dropna() print(Set.shape) Set.head() SetAdjusted = Set.copy() # then due to space limitation I will continue via email , I can't all explain well here ,so I will be sending you an email including a copy of such code above Best Regards