まずは、Visual Studio Codeを起動してプログラムファイルを作成する
Visual Studio Code (VS Code)を起動したら新規ファイル(*.py)を作成して行1-221をコピペします。
ここでは、Jupter NotebookのようにPythonのプログラムをセル単位で実行します。
VS Codeの場合は「#%%」から「#%%」の間がセルになります。
セルを選択したら[Ctrl + Enter」でセルのコードを実行します。
IPythonが起動されて「インタラクティブ」ウィンドウが表示されます。
「インタラクティブ」ウィンドウからはPythonのコードを入力して実行させることができます。
たとえば、「df.info()」を入力して[Shift + Enter」で実行します。
* Article.py:
# Comparing the Profitability of Multiple Cryptocurrencies Article.py
# %%
### Import pandas and matplotlib libraries
import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.offline as offline
import plotly.express as px
import plotly.graph_objs as go
import datetime as dt
from datetime import timedelta
from time import sleep
import yfinance as yf
import warnings
warnings.simplefilter('ignore')
plt.style.use('fivethirtyeight')
pd.set_option('display.max_rows', 10)
# %%
######################################################################################################################################
def load_data(symbol: str, start_date: dt.datetime , end_date: dt.datetime, period='1d', interval='1d', prepost=True) -> pd.DataFrame:
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
try:
end_date = end_date + timedelta(days=1)
start_date_str = dt.datetime.strftime(start_date, "%Y-%m-%d")
end_date_str = dt.datetime.strftime(end_date, "%Y-%m-%d")
print(f"Loading data for {symbol}: start_date={start_date_str}, end_date={end_date_str}, {period=}, {interval=}")
df = yf.download(symbol, start=start_date_str, end=end_date_str, period=period, interval=interval, prepost=prepost)
# Date Open High Low Close Adj Close Volume Symbol : interval=1d,5d,1wk,1mo,3mo
# Datetime Open High Low Close Adj Close Volume Symbol : interval=1m,2m,5m,15m,30m,60m,90m,1h
# Add symbol
df['symbol'] = symbol
# Reset index
df.reset_index(inplace=True)
# Rename Date or Datetime column name to Time
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
df.rename(columns={'Datetime': 'Date'}, inplace=True)
else: # interval=1d,5d,1wk,1mo,3mo
df.rename(columns={'Date': 'Date'}, inplace=True)
# Convert column names to lower case
df.columns = df.columns.str.lower()
return df
except:
print('Error loading data for ' + symbol)
return pd.DataFrame()
############################################
def get_data(csv_file: str) -> pd.DataFrame:
print(f"Loading data: {csv_file} ")
df = pd.read_csv(csv_file)
# date,open,high,low,close,adj close,volume,symbol
df['date'] = pd.to_datetime(df['date'])
df.set_index(['date'], inplace=True)
return df
##############################
# Main
##############################
### Load the crypto data from yahoo finance
symbols = ['BTC-USD', 'ETH-USD','LTC-USD']
# symbols = ['OP-USD']
# symbols = ['HEX-USD']
# symbols = ['TWT-USD', 'AAVE-USD', 'RPL-USD']
# symbols = ['MATIC-USD', 'AXS-USD']
# symbols = ['FMT-USD', 'DOGE-USD', 'TRAC-USD']
interval = '1d' # 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
df_list = []
for symbol in symbols:
csv_file = f"data/csv/compare_crypto_20180101({symbol})_{interval}.csv" # data/csv/compare_crypto_yyyymdd(BTC_USD)_1d.csv
isFile = os.path.isfile(csv_file)
if not isFile:
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
end = dt.datetime.now()
start = end - timedelta(days=7)
else: # interval=1d,5d,1wk,1mo,3mo
start = dt.datetime(2018,1,1) # 2018,1,1 or 2020,1,1 or 2023,1,1 or 2023,2,1
end = dt.datetime.now()
# load_data(symbol: str, start_date: dt.datetime , end_date: dt.datetime, period='1d', interval={'1m'|'1d'}, prepost=True) -> pd.DataFrame:
df = load_data(symbol, start, end, period='1d', interval=interval)
if df.shape[0] > 0:
df.to_csv(csv_file, index=False)
# end of if not isFile:
df = get_data(csv_file)
df_list.append(df)
raw_df = pd.concat(df_list)
raw_df.dropna(inplace=True)
raw_df.isnull().sum()
# %%
### Filter close, symbol columns
close_df = raw_df.filter(['close','symbol'])
close_df.reset_index(inplace=True)
close_df.groupby('symbol')['date'].agg(['min', 'max', 'count'])
# %%
### pivot table
pivot_df = close_df.pivot_table(index=['date'], columns='symbol', values=['close'])
pivot_df.dropna(inplace=True)
pivot_df.isnull().sum()
# pivot_df
# %%
#### flatten columns multi-index, `date` will become the dataframe index
# col[0] col[1]
# pivot_df.columns.values => array([('close', 'BTC-USD'), ('close', 'ETH-USD'), ('close', 'LTC-USD')])
pivot_df.columns = [col[1] for col in pivot_df.columns.values] # ['BTC-USD', 'ETH-USD', 'LTC-USD']
# pivot_df
# %%
### Calculate log return & cumulative log return
cum_df = pivot_df.copy()
log_col_name_list = []
cum_col_name_list = []
for symbol in symbols: # BTC-USD, ETH-USD, LTC-USD
coin = symbol
log_col_name = f'log_return_{coin}' # log_return_xxx,...
log_col_name_list.append(log_col_name)
# Calculate log return
cum_df[log_col_name] = np.log(cum_df[symbol] / cum_df[symbol].shift(1))
# cum_df['log_return_xxx'] = np.log(cum_df['xxx'] / cum_df['xxx'].shift(1))
# Calculate cumulative log return
cum_col_name = f'cum_log_return_{coin}' # cumulative_log_return_btc
cum_col_name_list.append(cum_col_name)
cum_df[cum_col_name] = np.exp(cum_df[log_col_name].cumsum()) - 1
# cum_df['cum_log_return_xxx'] = np.exp(cum_df['log_return_xxx'].cumsum()) - 1
# Preview the resulting dataframe
print(f"Cumulative Log Return ({symbol}) = {cum_df.iloc[-1][cum_col_name]:.4f}")
# print(f"Cumulative Log Return (xxx) = {cum_df.iloc[-1]['cum_log_return_xxx']:.4f}")
### Replace np.inf or -np.inf (positive or negative infinity) with np.nan(Not A Number)
cum_df.replace([np.inf, -np.inf], np.nan, inplace=True)
cum_df.isnull().sum()
### Drop rows if np.nan (Not A Number)
cum_df.dropna(inplace=True)
cum_df.isnull().sum()
# %%
### Transforming the cumulative returns data for plotting.
cum2_df = cum_df.reset_index()
for i, symbol in enumerate(symbols): # BTC-USD, ETH-USD, LTC-USD
# replace close price with cumulative log return
cum2_df[symbol] = cum2_df[cum_col_name_list[i]]
# cum2_df['BTC-USD'] = cum2_df['cum_log_return_btc']
# %%
#### Drop coumuns : log_return_btc,cum_log_return_btc,log_return_eth,cum_log_return_eth,log_return_ltc,cum_log_return_ltc
column_list = cum2_df.columns.to_list()
# column_list
# column_list[len(symbols)+1:]
drop_column_list = column_list[len(symbols)+1:]
cum3_df = cum2_df.drop(drop_column_list, axis=1)
# cum3_df
# %%
cum4_df = cum3_df.melt(id_vars=['date'], var_name='symbol', value_name='cum_log_return')
cum4_df['cum_log_return_pct'] = cum4_df['cum_log_return'] * 100
# cum4_df
#%%
# Plot line
fig = px.line(
cum4_df,
x='date', y='cum_log_return_pct', # log_y=True,
color='symbol',
labels={'cum_log_return_pct':'cumulative log returns (%)', }
)
# Add chart title and axis labels
fig.update_layout(
title=f'Performance - Cumulative Log Returns: Interval({interval.upper()})',
xaxis_rangeslider_visible=True, # Set True or False
xaxis_title='Date'
)
fig.show()
図1にはVS Codeの画面が表示されています。
次のステップでは「セル」を選択して「セル」単位でPythonのコードを実行します。