SnP - ESc1
Dow - YMc1
Bund - FGBLc1
10 Year t-note - ZNc1
BTP - FBTMc1
FOAT - FOATc1
UK Gilts - Not Available
US 10 year - TYc1
CGB - CGBc1
Australia 10 Year Bill - YTCc1
Gold - GCv1
Silver - SIv1
Copper - HGv1
RBOB - RBc1
GO - LGOc1
BRENT - LCOc1
WTI - CLc1
Brent - WTI - Not Available
import warnings
warnings.filterwarnings('ignore')
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()
from datetime import datetime
readRDS = robjects.r['readRDS']
#df = readRDS('CLc1.rds')
df = readRDS('ESc1.rds')
#df = pandas2ri.ri2py(df)
# do something with the dataframe
df_min=df
%matplotlib inline
#df=df.set_index('Timestamp')
#df.index=df.index.tz_localize(None)
df_min=df
print(df.head())
df['Last'].plot()
df=df.dropna()
print(df.info())
import seaborn as sns
sns.heatmap(df.isnull(), cbar=False)
import matplotlib.pyplot as plt
%matplotlib inline
df=df.set_index('Timestamp')
df.index=df.index.tz_localize(None)
df_min=df
print(df.head())
plt.title("Close price of SnP")
df['Last'].plot()
plt.show()
Resampling the minute data into daily data frame
ohlc_dict = {
'Open':'first',
'High':'max',
'Low':'min',
'Last': 'last',
'Volume': 'sum'
}
df_daily=df.resample('1D').apply(ohlc_dict)
df_daily=df_daily.dropna()
print(df_daily.info())
import seaborn as sns
sns.heatmap(df_daily.isnull(), cbar=False)
#df_daily['2014-05':'2015-05']['High'].plot()
df_daily['2014-05-31':'2015-05-04']
print(len(df['2014-06-30':'2014-07-01']))
df['2014-05-31':'2014-06-01']
df_daily['Close']=df_daily['Last']
df=df_daily
print('The total no of days is',len(df))
Here we extend our data frame by adding different analytical parameters such as returns,o-h ranging ,previous_close-high,gap-up/gap-down etc
import numpy as np
df['Returns']=((df['Close']-df['Open'])/df['Close'])*100
#df['Returns']=
#df['Returns']=((df['Close']-df['Close'].shift(1))/df['Close'].shift(1))*100
df['Prev_Close']=df['Close'].shift(1)
df['Log_Returns']=np.log((df['Close']/df['Prev_Close']))*100
df['PrevClose-Current_High']=((df['High']-df['Prev_Close'])/df['Prev_Close'])*100
df['Gap-up/down']=((df['Open']-df['Prev_Close'])/df['Prev_Close'])*100
df['Max_Swings']=((df['High']-df['Low'])/df['High'])*100
df['O-H']=((df['High']-df['Open'])/df['Open'])*100
#print(df.head())
df=df.dropna()
print(df.head())
import pandas as pd
pd.set_option('precision', 3)
df.describe()
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
sns.set(color_codes=True)
plt.figure(figsize=(12,8))
sns.distplot(df['Log_Returns'],bins=20)
df['year']=df.index.year
df['month']=df.index.month
df['day']=df.index.day
df['week']=df.index.week
df=df.dropna()
print(min(df['Returns']))
import plotly.express as px
fig = px.scatter(df, x="day", y="Returns", animation_frame="year", animation_group="day",
size="Volume", color="month", hover_name="year", facet_col="month",
log_x=False, size_max=45, range_x=[0,35], range_y=[-50,10], width=1800, height=400)
fig.show()
fig = px.histogram(df, x='Log_Returns',nbins=20,title='Logarithmic Returns Distribution')
fig.show()
import plotly.express as px
fig = px.histogram(df, x="Max_Swings",nbins=50,title='Max Swing Distribution')
fig.show()
sns.distplot(df['Max_Swings'],bins=20)
#a=df[df['Returns']>-30]
#df2=a
#print(df2)
#fig = px.histogram(df, x="Returns",nbins=50,histnorm='probability')
#fig.show()
#fig = px.histogram(df, x="R",nbins=100)
#fig = px.histogram(df, x="Returns",nbins=50)
#fig.show()
#plt.figure(figsize=(12,8))
#sns.distplot(df['Returns'],bins=40,color='g')
#from scipy.stats import norm
#plt.figure(figsize=(12,8))
#plt.hist(df['Returns'], bins=40, color='g');
#plt.plot(df['Returns'], norm.pdf(df['Returns']),hist)
plt.figure(figsize=(12,8))
sns.distplot(df['Returns'],bins=40,color='g')
plt.gca().set(title='Returns Distribution', ylabel='Frequency',xlabel="Returns");
plt.show()
fig = px.histogram(df, x="O-H",nbins=50,title="Open High Range Distribution",histnorm='probability')
fig.show()
#from scipy.stats import norm
#plt.figure(figsize=(12,8))
#plt.hist(df['O-H'], bins=40, color='g');
##plt.plot(df['Returns'], norm.pdf(df['Returns']),hist)
#plt.gca().set(title='Returns Distribution', ylabel='Frequency',xlabel="Open-High Range");
#plt.xlim(-6,6,0.5)
#plt.xticks()
#plt.show()
#plt.figure(figsize=(12,8))
#sns.distplot(df['O-H'],bins=40,color='g')
#plt.gca().set(title='Open High Ranging', ylabel='Frequency',xlabel="Returns");
#plt.show()
s=df[df['O-H']>4]
print(len(s))
probab_greater_than_3=(len(s)/len(df_daily))*100
probab_greater_than_3
fig = px.histogram(df, x="PrevClose-Current_High",nbins=50,title="Previous Close to Current Day High")
fig.show()
#plt.figure(figsize=(12,8))
#sns.distplot(df['PrevClose-Current_High'],bins=40,color='g',kde=False,hist=True)
#plt.gca().set(title='PrevClose-Current_High Range');
#plt.show()
#fig = px.scatter(df,y="Gap-up/down",title="Gap Up/Gap Down Distribution")
#fig.show()
#df1=pd.DataFrame()
#df_1=df[df["Gap-up/down"]>-2]
#print(df["Gap-up/down"].count())
#print(df_1)
#plt.figure(figsize=(16,10))
#sns.scatterplot(df["Gap-up/down"])
#sns.scatterplot(y=df["Gap-up/down"],x=df["Returns"],color='g')
#plt.xlim(df.index[0],df.index[-1])
#plt.xlim(df.index[0],df.index[-1])
#plt.gca().set(title="Gap-up/down distribution");
#plt.ylim(0,1)
#plt.scatter(df["Gap-up/down"])
#plt.plot(df["Returns"])
#plt.legend()
#plt.plot(df["Returns"]/max(abs(df["Returns"])))
#plt.plot(df["Gap-up/down"]/max(abs(df["Gap-up/down"])))
#sns.pairplot(data=df)
#plt.plot()
#plt.show()
#import scipy
#a=scipy.stats.pearsonr(df["Returns"],df["Max_Swings"] )
#print(a)
#plt.hist(df["Gap-up/down"])
plt.figure(figsize=(12,8))
df['weekday']=df.index.dayofweek
df_weekday=df.groupby('weekday').mean()
#df_weekday['Volume'].plot()
#a['Close'].plot()
#fig = px.scatter(df_weekday,y="Volume",title="Volume Distribution per day")
sns.set(color_codes=True)
sns.scatterplot(x=df_weekday.index,y=df_weekday["Volume"],color='r',marker='^',s=500)
plt.show()
#fig.show()
As international market is open from Monday to Saturday, we can see tha a significantly reduced amount is traded on Monday and lowest on Saturday. Tuesday-Friday volume is on similar notes.
import pandas as pd
import matplotlib.pyplot as plt
df.index=pd.to_datetime(df.index)
prices = df['Close']
returns = prices.pct_change()
import monthly_returns_heatmap as mrh
plt.figure(figsize=(20,10))
#returns.plot_monthly_returns_heatmap()
mrh.plot(returns) # <== or using direct call
import seaborn as sns
sns.set(style="ticks")
#a=df[df['Returns']>-30]
#df2=a[a['Max_Swings']<50]
#rs = np.random.RandomState(11)
x = df['Returns']
y = df['Max_Swings']
#sns.jointplot(x, y, kind="hex", color="#4CB391")
sns.jointplot(x=df['Returns'], y= df['Max_Swings'], data=df);
df['year']=df.index.year
df['month']=df.index.month
df['day']=df.index.day
df['week']=df.index.week
df=df.dropna()
import plotly.express as px
fig = px.scatter(df, x="day", y="Returns", animation_frame="year", animation_group="day",
size="Volume", color="month", hover_name="year", facet_col="month",
log_x=False, size_max=45, range_x=[0,35], range_y=[-10,10], width=1800, height=400)
fig.show()
df2=df_min
df_min['Hour']=((df_min.index).hour)
df_min['date']=df_min.index.date
high_daily=df_min.groupby("date").agg({"High":max})
high_daily.reset_index(inplace=True)
print(high_daily.head())
from tqdm import tqdm
high_list=[]
for i in tqdm(range(len(high_daily))):
a=df_min[(df_min['High']==high_daily['High'][i]) & (df_min['date']==high_daily['date'][i])]
t=a["Hour"][0]
#print(int(t))
high_list.append(t)
#fig = px.histogram(high_list,x=high_list,title='Day High Occurence')
#fig.show()
plt.figure(figsize=(12,8))
sns.distplot(high_list,bins=20,color='g')
plt.gca().set(title='Day High Time analysis', ylabel='Probability ');
plt.show()
from collections import Counter
Counter(high_list)
low_daily=df_min.groupby("date").agg({"Low":min})
low_daily.reset_index(inplace=True)
print(low_daily.head())
from tqdm import tqdm
low_list=[]
for i in tqdm(range(len(low_daily))):
a=df_min[(df_min['Low']==low_daily['Low'][i]) & (df_min['date']==low_daily['date'][i])]
t=a["Hour"][0]
#print(int(t))
low_list.append(t)
from collections import Counter
Counter(low_list)
#fig = px.histogram(low_list,x=low_list)
#fig.show()
plt.figure(figsize=(12,8))
sns.distplot(low_list,bins=20,color='g')
plt.gca().set(title='Day low Time analysis', ylabel='Probability ');
plt.show()
df_min.describe()
#import plotly.io as pio
#pio.renderers.default='notebook'
#import plotly.io as pio
#pio.renderers.default = "notebook_connected"
import plotly.offline as pyo
import plotly.graph_objs as go
# Set notebook mode to work in offline
pyo.init_notebook_mode()