まずは、Visual Studio Codeを起動してプログラムファイルを作成する
Visual Studio Code (VS Code)を起動したら新規ファイル(*.py)を作成して行1-493をコピペします。
ここでは、Jupter NotebookのようにPythonのプログラムをセル単位で実行します。
VS Codeの場合は「#%%」から「#%%」の間がセルになります。
セルを選択したら[Ctrl + Enter」でセルのコードを実行します。
IPythonが起動されて「インタラクティブ」ウィンドウが表示されます。
「インタラクティブ」ウィンドウからはPythonのコードを入力して実行させることができます。
たとえば、「df.info()」を入力して[Shift + Enter」で実行します。
* Article.py
# Daily returns vs Log returns article v81.py (Part 7) : Pandas DataFrame ReSample vs Round (1m,2,,3m,....)
# %%
### Import pandas, matplotlib, plotly libraries
import os
import math
import numpy as np
import datetime as dt
import time
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.offline as offline
import plotly.express as px
import plotly.graph_objs as go
import datetime as dt
from datetime import timedelta
from time import sleep
import requests
import warnings
warnings.simplefilter('ignore')
plt.style.use('fivethirtyeight')
pd.set_option('display.max_rows', 10)
# %%
####################################################################################### Load crypto data from binance
def get_crypto(symbol='BTCUSDT', interval='1d', limit=500, start=None) -> pd.DataFrame:
'''
interval='1d' 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1M
limit=500 (default 500, max 1000)
start='2019-01-01 00:00:00' default
'''
url = 'https://api.binance.com/api/v3/klines'
params = {
'symbol': symbol,
'interval': interval,
'limit': str(min(limit, 1000)) # enforce max limit of 1000
}
if start is not None:
# add a new element(startTime) to the params dictionary
params['startTime'] = int(dt.datetime.timestamp(pd.to_datetime(start))*1000) # convert to milliseconds (ms)
try:
print(params)
# {'symbol': 'BTCUSDT', 'interval': '1d', 'limit': '1000'}
# {'symbol': 'BTCUSDT', 'interval': '1m', 'limit': '1000', 'startTime': 1672498800000}
kline = requests.get(url, params=params)
kline.raise_for_status() # raise an error for 4xx and 5xx status codes
kline_json = kline.json()
df = pd.DataFrame(kline_json, columns=['date', 'open', 'high', 'low', 'close', 'volume', 'close_time',
'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume',
'taker_buy_quote_asset_volume', '_'])
df = df.iloc[:, :6] # only keep the columns for date, OHLC, and volume
df['date'] = pd.to_datetime(df['date'], unit='ms')
# df['date'] = pd.to_datetime(df['date'], unit='ms' if interval in ['1d', '3d', '1w', '1M'] else None)
df = df.set_index('date')
df = df.astype(float)
df['symbol'] = symbol
print(f"get_crypto({symbol=}, {interval=}, {limit=}, {start=}) => {df.shape[0]=}")
return df
except requests.exceptions.HTTPError as e:
print(f"get_crypto({symbol=}, {interval=}, {limit=}, {start=}) HTTP error: {e}")
except Exception as e:
print(f"get_crypto({symbol=}, {interval=}, {limit=}, {start=}) exception error: {e}")
return pd.DataFrame()
############################################ load crypto data from csv file
def get_data(csv_file: str) -> pd.DataFrame:
print(f"Loading data: {csv_file} ")
df = pd.read_csv(csv_file)
# date,open,high,low,close,adj close,volume,symbol
intervals = {'1d', '3d', '1w', '1M'}
found = any(interval in csv_file for interval in intervals)
if found: # '1d', '3d', '1w', '1M'
df['date'] = pd.to_datetime(df['date']) # convert to a datetime
else: # 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h
# df['date'] = pd.to_datetime(df['date'])
df['date'] = pd.to_datetime(df['date'], utc=True) # convert to a timezone-aware UTC-localized Datetime
df.set_index(['date'], inplace=True)
return df
############################################################### Calculate cumulative log return
def calculate_cum_log_return(df: pd.DataFrame) -> pd.DataFrame:
# Calculate log return
df['log_return'] = np.log(df['close'] / df['close'].shift(1))
# Calculate cumulative log return
df['cum_log_return'] = np.exp(df['log_return'].cumsum()) - 1
df['cum_log_return_pct'] = df['cum_log_return'] * 100
# Preview the resulting dataframe
print(f"Cumulative Log Return for {df.iloc[-1]['symbol']} = {df.iloc[-1]['cum_log_return_pct']:.2%}")
return df
#################################
# Main
#################################
### Load the data from binance
symbol = 'BTCUSDT'
interval = '1m' # "1m", "3m", "5m", "15m", "30m", "1h", "2h", "4h", "6h", "8h", "12h", "1d", "3d", "1w", "1M"
limit = 1000
start='now' # now or today, start='2023-02-24 00:00:00'
symbol_list = []
cum_log_return_list = []
csv_file = f"data/csv/binance_crypto({symbol})_{interval}.csv" # data/csv/binance_crypto(BTCUSDT)_1m.csv
isFile = os.path.isfile(csv_file)
if not isFile:
# get_crypto(symbol='BTCUSDT', interval='1d', limit=500, start=None)
if start in 'today, now':
df = get_crypto(symbol, interval, limit)
else:
df = get_crypto(symbol, interval, limit, start)
if not df.empty:
df.to_csv(csv_file, index=True)
# end of if not isFile:
isFile = os.path.isfile(csv_file)
if isFile:
df = get_data(csv_file)
else:
df = pd.DataFrame()
if df.empty:
print(f"Quit the program due to dataframe(df) is empty: {df.empty=}")
quit()
raw_df = df.copy()
raw_df
# %%
FREQUENCY1 = '1T'
FREQUENCY2 = '1H'
FREQUENCY3 = '5H'
### Method1:
df = raw_df.copy()
# resample to 15-minute interval
df1 = df.resample(FREQUENCY1).agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum',
'symbol': 'last'
})
df1
# %%
# df_1T.reset_index(inplace=True)
df1.groupby('symbol').agg(['count', 'sum'])
# df1.groupby('symbol').agg(['min', 'max', 'sum'])
# df1.groupby('symbol').agg(['min', 'max', 'count'])
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df1['close'], label='close')
plt.title(f'{symbol} Plot Line Chart: Close Price ({FREQUENCY1})')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
# resample to 30-minute interval
df2 = df.resample(FREQUENCY2).agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum',
'symbol': 'last'
})
df2
# %%
df2.groupby('symbol').agg(['count', 'sum'])
# df2.groupby('symbol').agg(['min', 'max', 'sum'])
# df2.groupby('symbol').agg(['min', 'max', 'count'])
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df2['close'], label='close')
plt.title(f'{symbol} Plot Line Chart: Close Price ({FREQUENCY2})')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
# resample to 60-minute interval
df3 = df.resample(FREQUENCY3).agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum',
'symbol': 'last'
})
df3
# %%
df3.groupby('symbol').agg(['count', 'sum'])
# df3.groupby('symbol').agg(['min', 'max', 'sum'])
# df3.groupby('symbol').agg(['min', 'max', 'count'])
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df3['close'], label='close')
plt.title(f'{symbol} Plot Line Chart: Close Price ({FREQUENCY3})')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df1['close'], label=f'close {FREQUENCY1}')
plt.plot(df2['close'], label=f'close {FREQUENCY2}')
plt.plot(df3['close'], label=f'close {FREQUENCY3}')
plt.title(f'{symbol} Close Price Comparison')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
### Method2: DataFrame Round()
df1 = raw_df.copy()
df1.reset_index(inplace=True)
# round to 15-minute interval
df1['date'] = df1['date'].round(FREQUENCY1)
# update open, close, low, high, volume
df1['open'] = df1.groupby('date')['open'].transform('first')
df1['close'] = df1.groupby('date')['close'].transform('last')
df1['low'] = df1.groupby('date')['low'].transform('min')
df1['high'] = df1.groupby('date')['high'].transform('max')
df1['volume'] = df1.groupby('date')['volume'].transform('sum')
# drop duplicates
df1.drop_duplicates(subset=['date'], keep='last', inplace=True)
df1.set_index('date', inplace=True)
df1
# %%
# df1.describe
df1.groupby('symbol').agg(['count', 'sum'])
# df1.groupby('symbol').agg(['min', 'max', 'sum'])
# df1.groupby('symbol').agg(['min', 'max', 'count'])
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df1['close'], label='close')
plt.title(f'{symbol} Plot Line Chart: Close Price ({FREQUENCY1})')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
# round to 30-minute interval
df2 = raw_df.copy()
df2.reset_index(inplace=True)
# round to 30-minute interval
df2['date'] = df2['date'].round(FREQUENCY2)
# update open, close, low, high, volume
df2['open'] = df2.groupby('date')['open'].transform('first')
df2['close'] = df2.groupby('date')['close'].transform('last')
df2['low'] = df2.groupby('date')['low'].transform('min')
df2['high'] = df2.groupby('date')['high'].transform('max')
df2['volume'] = df2.groupby('date')['volume'].transform('sum')
# drop duplicates
df2.drop_duplicates(subset=['date'], keep='last', inplace=True)
df2.set_index('date', inplace=True)
df2
# %%
df2.groupby('symbol').agg(['count', 'sum'])
# df2.groupby('symbol').agg(['min', 'max', 'sum'])
# df2.groupby('symbol').agg(['min', 'max', 'count'])
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df2['close'], label='close')
plt.title(f'{symbol} Plot Line Chart: Close Price ({FREQUENCY2})')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
df3 = raw_df.copy()
df3.reset_index(inplace=True)
# round to 60-minute interval
df3['date'] = df3['date'].round(FREQUENCY3)
# update open, close, low, high, volume
df3['open'] = df3.groupby('date')['open'].transform('first')
df3['close'] = df3.groupby('date')['close'].transform('last')
df3['low'] = df3.groupby('date')['low'].transform('min')
df3['high'] = df3.groupby('date')['high'].transform('max')
df3['volume'] = df3.groupby('date')['volume'].transform('sum')
# drop duplicates
df3.drop_duplicates(subset=['date'], keep='last', inplace=True)
df3.set_index('date', inplace=True)
df3
# %%
df3.groupby('symbol').agg(['count', 'sum'])
# df3.groupby('symbol').agg(['min', 'max', 'sum'])
# df3.groupby('symbol').agg(['min', 'max', 'count'])
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df3['close'], label='close')
plt.title(f'{symbol} Plot Line Chart: Close Price ({FREQUENCY3})')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df1['close'], label=f'close {FREQUENCY1}')
plt.plot(df2['close'], label=f'close {FREQUENCY2}')
plt.plot(df3['close'], label=f'close {FREQUENCY3}')
plt.title(f'{symbol} Close Price Comparison')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
df1 = calculate_cum_log_return(df1)
df2 = calculate_cum_log_return(df2)
df3 = calculate_cum_log_return(df3)
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df1['cum_log_return'], label=f'return {FREQUENCY1}')
plt.plot(df2['cum_log_return'], label=f'return {FREQUENCY2}')
plt.plot(df3['cum_log_return'], label=f'return {FREQUENCY3}')
plt.title(f'{symbol} Cumulative Log Return Comparison')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
df1 = calculate_cum_log_return(df1)
df2 = calculate_cum_log_return(df2)
df3 = calculate_cum_log_return(df3)
### Plot Line Chart : Close price
fig = plt.figure(figsize=(12, 6))
plt.plot(df1['cum_log_return'], label=f'return {FREQUENCY1}')
plt.plot(df2['cum_log_return'], label=f'return {FREQUENCY2}')
plt.plot(df3['cum_log_return'], label=f'return {FREQUENCY3}')
plt.title(f'{symbol} Cumulative Log Return Comparison')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
# %%
df1 = df1.replace([np.inf, -np.inf], np.nan)
df1.dropna(axis=0, inplace=True)
df2 = df2.replace([np.inf, -np.inf], np.nan)
df2.dropna(axis=0, inplace=True)
df3 = df3.replace([np.inf, -np.inf], np.nan)
df3.dropna(axis=0, inplace=True)
### Plotly
fig = make_subplots(rows=1, cols=1)
fig.add_trace(go.Candlestick(
x = df1.index,
open = df['open'],
high = df['high'],
low = df['low'],
close = df['close'],
increasing_line_color = 'rgba(107,107,107,0.8)',
decreasing_line_color = 'rgba(210,210,210,0.8)',
name = '1T'),
row=1, col=1)
fig.add_trace(go.Scatter(
x = df2.index,
y = df2['close'],
mode = 'markers',
customdata = df2,
marker_symbol = 'diamond-dot',
marker_size = 13,
marker_line_width = 2,
marker_line_color = 'rgba(0,0,0,0.7)',
marker_color = 'rgba(0,255,0,0.7)',
hovertemplate = 'Date: %{x}<br>' +\
'Close Price: %{y:.2f}<br>' +\
'Return: %{customdata[7]:.2%}',
name = '1H')
)
fig.add_trace(go.Scatter(
x = df3.index,
y = df3['close'],
mode = 'markers',
customdata = df3,
marker_symbol = 'diamond-dot',
marker_size = 13,
marker_line_width = 2,
marker_line_color = 'rgba(0,0,0,0.7)',
marker_color = 'rgba(255,0,0,0.7)',
hovertemplate = 'Date: %{x}<br>' +\
'Close Price: %{y:.2f}<br>' +\
'Return: %{customdata[7]:.2%}',
name = '5H')
)
fig.update_layout(
title=f'{symbol} Cumulative Log Return Comparison (1T, 1H, 5H)',
xaxis_rangeslider_visible=False,
xaxis_title='Date',
yaxis_title='Price')
fig.show()
図1にはVS Codeの画面が表示されています。
次のステップでは「セル」を選択して「セル」単位でPythonのコードを実行します。