まずは、Visual Studio Codeを起動してプログラムファイルを作成する
Visual Studio Code (VS Code)を起動したら新規ファイル(*.py)を作成して行1-188をコピペします。
ここでは、Jupter NotebookのようにPythonのプログラムをセル単位で実行します。
VS Codeの場合は「#%%」から「#%%」の間がセルになります。
セルを選択したら[Ctrl + Enter」でセルのコードを実行します。
IPythonが起動されて「インタラクティブ」ウィンドウが表示されます。
「インタラクティブ」ウィンドウからはPythonのコードを入力して実行させることができます。
たとえば、「df.info()」を入力して[Shift + Enter」で実行します。
* Article.py:
# Comparing the Profitability of Multiple Cryptocurrencies Article.py
# %%
### Import pandas and matplotlib libraries
import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
from datetime import timedelta
from time import sleep
import yfinance as yf
import warnings
warnings.simplefilter('ignore')
plt.style.use('fivethirtyeight')
pd.set_option('display.max_rows', 10)
# %%
######################################################################################################################################
def load_data(symbol: str, start_date: dt.datetime , end_date: dt.datetime, period='1d', interval='1d', prepost=True) -> pd.DataFrame:
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
try:
end_date = end_date + timedelta(days=1)
start_date_str = dt.datetime.strftime(start_date, "%Y-%m-%d")
end_date_str = dt.datetime.strftime(end_date, "%Y-%m-%d")
print(f"Loading data for {symbol}: start_date={start_date_str}, end_date={end_date_str}, {period=}, {interval=}")
df = yf.download(symbol, start=start_date_str, end=end_date_str, period=period, interval=interval, prepost=prepost)
# Date Open High Low Close Adj Close Volume Symbol : interval=1d,5d,1wk,1mo,3mo
# Datetime Open High Low Close Adj Close Volume Symbol : interval=1m,2m,5m,15m,30m,60m,90m,1h
# Add symbol
df['symbol'] = symbol
# Reset index
df.reset_index(inplace=True)
# Rename Date or Datetime column name to Time
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
df.rename(columns={'Datetime': 'Date'}, inplace=True)
else: # interval=1d,5d,1wk,1mo,3mo
df.rename(columns={'Date': 'Date'}, inplace=True)
# Convert column names to lower case
df.columns = df.columns.str.lower()
return df
except:
print('Error loading data for ' + symbol)
return pd.DataFrame()
############################################
def get_data(csv_file: str) -> pd.DataFrame:
print(f"Loading data: {csv_file} ")
df = pd.read_csv(csv_file)
# date,open,high,low,close,adj close,volume,symbol
df['date'] = pd.to_datetime(df['date'])
# df['date'] = pd.to_datetime(df['date'], utc=True)
df.set_index(['date'], inplace=True)
return df
##############################
# Main
##############################
### Load the crypto data from yahoo finance
symbols = ['BTC-JPY', 'ETH-JPY','LTC-JPY']
# symbols = ['BTC-JPY', 'ETH-JPY', 'LTC-JPY','XRP-JPY','BCH-JPY']
# symbols = ['BTC-JPY', 'ETH-JPY','LTC-JPY','ADA-JPY']
# symbols = ['MATIC-JPY']
# symbols = ['BTC-JPY', 'ETH-JPY','LTC-JPY','ADA-JPY','MATIC-JPY']
# symbols = ['BTC-JPYD', 'ETH-JPY', 'LTC-JPY','XRP-JPY','BCH-JPY','ADA-JPY','DOGE-JPY','MATIC-JPY']
# symbols = tickers = ['GOOGL', 'AAPL', 'MSFT','META','AMZN']
interval = '1d' # 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
df_list = []
for symbol in symbols:
csv_file = f"data/csv/log_return({symbol})_{interval}.csv" # data/csv/log_return(BTC_USD)_1d.csv
isFile = os.path.isfile(csv_file)
if not isFile:
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
end = dt.datetime.now()
start = end - timedelta(days=7)
else: # interval=1d,5d,1wk,1mo,3mo
start = dt.datetime(2014,1,1) # 2014,1,1 or 2020,1,1 or 2023,1,1
end = dt.datetime.now()
# load_data(symbol: str, start_date: dt.datetime , end_date: dt.datetime, period='1d', interval={'1m'|'1d'}, prepost=True) -> pd.DataFrame:
df = load_data(symbol, start, end, period='1d', interval=interval)
if df.shape[0] > 0:
df.to_csv(csv_file, index=False)
# end of if not isFile:
df = get_data(csv_file)
df_list.append(df)
row_df = pd.concat(df_list)
# %%
### Filter close, symbol columns
df = row_df.filter(['close','symbol'])
df.reset_index(inplace=True)
# df
# %%
df.groupby('symbol')['date'].agg(['min', 'max', 'count'])
# %%
### pivot table
pivot_df = df.pivot_table(index=['date'], columns='symbol', values=['close'])
pivot_df.dropna(inplace=True)
pivot_df.isnull().sum()
# pivot_df
# %%
#### flatten columns multi-index, `date` will become the dataframe index
# col[0] col[1]
# pivot_df.columns.values => array([('close', 'BTC-JPY'), ('close', 'ETH-JPY'), ('close', 'LTC-JPY')])
pivot_df.columns = [col[1] for col in pivot_df.columns.values] # ['BTC-JPY', 'ETH-JPY', 'LTC-JPY']
# pivot_df
# %%
### Calculate log return & cumulative log return
dfx = pivot_df.copy()
log_col_name_list = []
cum_col_name_list = []
for symbol in symbols: # BTC-JPY, ETH-JPY, LTC-JPY
coin = symbol
log_col_name = f'log_return_{coin}' # log_return_xxx,...
log_col_name_list.append(log_col_name)
# Calculate log return
dfx[log_col_name] = np.log(dfx[symbol] / dfx[symbol].shift(1))
# dfx['log_return_xxx'] = np.log(dfx['xxx'] / dfx['xxx'].shift(1))
# Calculate cumulative log return
cum_col_name = f'cum_log_return_{coin}' # cumulative_log_return_btc
cum_col_name_list.append(cum_col_name)
dfx[cum_col_name] = np.exp(dfx[log_col_name].cumsum()) - 1
# dfx['cum_log_return_xxx'] = np.exp(dfx['log_return_xxx'].cumsum()) - 1
# Preview the resulting dataframe
print(f"Cumulative Log Return ({symbol}) = {dfx.iloc[-1][cum_col_name]:.4f}")
# print(f"Cumulative Log Return (xxx) = {dfx.iloc[-1]['cum_log_return_xxx']:.4f}")
dfx.dropna(inplace=True)
dfx.isnull().sum()
# %%
### Plot Cumulative Log Returns
plt.figure(figsize=(10,5))
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
plt.gcf().autofmt_xdate()
plt.title(f'Performance: Cumulative Log Returns - Interval({interval.upper()})', fontsize=18)
# plt.yscale('log')
for i, symbol in enumerate(symbols):
plt.plot(dfx[cum_col_name_list[i]], label=symbol)
plt.xlabel('Date')
plt.ylabel('Cumulative log returns')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.show()
図1にはVS Codeの画面が表示されています。 次のステップでは「セル」を選択して「セル」単位でPythonのコードを実行します。