まずは、Visual Studio Codeを起動してプログラムファイルを作成する
Visual Studio Code (VS Code)を起動したら新規ファイル(*.py)を作成して行1-353をコピペします。
ここでは、Jupter NotebookのようにPythonのプログラムをセル単位で実行します。
VS Codeの場合は「#%%」から「#%%」の間がセルになります。
セルを選択したら[Ctrl + Enter」でセルのコードを実行します。
IPythonが起動されて「インタラクティブ」ウィンドウが表示されます。
「インタラクティブ」ウィンドウからはPythonのコードを入力して実行させることができます。
たとえば、「df.info()」を入力して[Shift + Enter」で実行します。
* Article.py
# Daily returns vs Log returns article part8.py
# %%
### Import pandas, matplotlib, plotly libraries
import os
import math
import numpy as np
import datetime as dt
from datetime import timedelta
from time import sleep
import pandas as pd
from tabulate import tabulate # pip install tabulate
from prettytable import PrettyTable # pip install prettytable
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.offline as offline
import plotly.express as px
import plotly.graph_objs as go
import requests
import warnings
warnings.simplefilter('ignore')
plt.style.use('fivethirtyeight')
pd.set_option('display.max_rows', 10)
# %%
#################################################### Get all crypto symbols
def get_crypto_symbols(symbol=None) -> pd.DataFrame:
# Define GMO Coin API endpoint
endpoint = 'https://api.coin.z.com/public/v1/ticker'
params = {}
if symbol is not None:
# add a new element(symbol) to the params dictionary
params['symbol'] = symbol
res_dict = requests.get(endpoint, params=params)
dict = res_dict.json()
status = dict.get('status')
# no error ?
if status == 0:
data_list = dict.get('data')
df = pd.DataFrame(data_list) # ask, bid, high, last, low, symbol, timestamp, volume
return df
else:
print(f"get_crypto_symbols() error => {status=}")
return pd.DataFrame()
############################################################ Load data from GMO Coin
def get_crypto(symbol='BTC', interval='1day', start='2018'):
'''
interval=1min 5min 10min 15min 30min 1hour for YYYYMMDD
interval=4hour 8hour 12hour 1day 1week 1month YYYY
start='2018'
'''
url = 'https://api.coin.z.com/public/v1/klines'
params = {
'symbol': symbol,
'interval': interval,
'date': start
}
try:
res_dict = requests.get(url, params=params)
dict = res_dict.json()
status = dict.get('status')
# no error ?
if status == 0:
data_list = dict.get('data')
df = pd.DataFrame(data_list)
df.columns = ['date', 'open', 'high', 'low', 'close', 'volume']
df['date'] = pd.to_datetime(df['date'], unit='ms')
df.set_index('date', inplace=True)
df = df.astype(float)
df['symbol'] = symbol
# df.reset_index(inplace=True)
print(f"get_crypto({symbol=}, {interval=}, {start=}) => {df.shape[0]=}")
return df
else:
print(f"get_crypto({symbol=}, {interval=}, {start=}) error => {status=}")
return pd.DataFrame()
except requests.exceptions.HTTPError as e:
print(f"get_crypto({symbol=}, {interval=}, {start=}) HTTP error: {e}")
except Exception as e:
print(f"get_crypto({symbol=}, {interval=}, {start=}) exception error: {e}")
return pd.DataFrame()
############################################
def get_data(csv_file: str) -> pd.DataFrame:
print(f"Loading data: {csv_file} ")
df = pd.read_csv(csv_file)
# date,open,high,low,close,adj close,volume,symbol
df['date'] = pd.to_datetime(df['date'])
df.set_index(['date'], inplace=True)
return df
###############################################################
def calculate_cum_log_return(df: pd.DataFrame) -> pd.DataFrame:
# Calculate log return
df['log_return'] = np.log(df['close'] / df['close'].shift(1))
# Calculate cumulative log return
df['cum_log_return'] = np.exp(df['log_return'].cumsum()) - 1
df['cum_log_return_pct'] = df['cum_log_return'] * 100
# Preview the resulting dataframe
print(f"Cumulative Log Return for {df.iloc[-1]['symbol']} = {df.iloc[-1]['cum_log_return_pct']:.2%}")
return df
#################################
# Main
#################################
### Load the data from gmo coin
# get all symbols from gmo coin
df = get_crypto_symbols()
symbols = df['symbol'].values.tolist()
len(symbols) # => 26
# %%
interval = '1day'
date_list = ['2020','2021','2022','2023']
symbol_list = []
cum_log_return_list = []
cum_log_return_pct_list = []
for symbol in symbols:
csv_file = f"data/csv/gmo_crypto_2020_2023({symbol})_{interval}.csv"
isFile = os.path.isfile(csv_file)
if not isFile:
for date in date_list: # '2020','2021','2022','2023'
# get_crypto(symbol='BTC', interval='1day', start='2018')
df = get_crypto(symbol, interval, date) # get n rows from starting date
if not df.empty:
df.to_csv(csv_file, index=True)
# end of if not isFile:
isFile = os.path.isfile(csv_file)
if isFile:
df = get_data(csv_file)
if not df.empty:
df = calculate_cum_log_return(df)
df.replace([np.inf, -np.inf], np.nan).dropna(axis=1, inplace=True)
cum_log_return = df.iloc[-1]['cum_log_return']
cum_log_return_pct = df.iloc[-1]['cum_log_return_pct']
symbol_list.append(symbol)
cum_log_return_list.append(cum_log_return)
cum_log_return_pct_list.append(cum_log_return_pct)
# end of for symbol in symbols:
### Create DataFrame from dict
data = {
'symbol': symbol_list,
'cum_log_return': cum_log_return_list,
'cum_log_return_pct': cum_log_return_pct_list
}
raw_df = pd.DataFrame(data)
if raw_df.empty:
print(f"Quit the program due to raw_df is empty: {raw_df.empty=}")
quit()
raw_df
# %%
### Replace np.inf or -np.inf (positive or negative infinity) with np.nan(Not A Number)
df = raw_df.replace([np.inf, -np.inf], np.nan)
df.isnull().sum()
### Drop rows if np.nan (Not A Number)
df.dropna(axis=0, inplace=True)
df.isnull().sum()
# %%
### Print using tabulate (tablefmt='orgtbl') : numalign='right', floatfmt='.2f'
TABLEFMT = 'orgtbl'
# TABLEFMT = 'github'
best_df = df.nlargest(10, 'cum_log_return_pct')
worst_df = df.nsmallest(10, 'cum_log_return_pct')
best_df.reset_index(inplace=True)
worst_df.reset_index(inplace=True)
# Convert best_df and worst_df to lists
best_list = best_df[['symbol', 'cum_log_return_pct']].values.tolist()
worst_list = worst_df[['symbol', 'cum_log_return_pct']].values.tolist()
# Add index column to each table
best_list = [[i] + row for i, row in enumerate(best_list, 1)]
worst_list = [[i] + row for i, row in enumerate(worst_list, 1)]
# Print tables using tabulate
print(f'Top 10 Cryptocurrencies by Cumulative Log Return (%): {TABLEFMT}')
print('-'*44)
print(tabulate(best_list,
headers=['#', 'Symbol', 'Cumulative Log Return'],
numalign='right',
floatfmt='.2f',
tablefmt='orgtbl')) # orgtbl, github
print('-'*44)
print()
print(f'Bottom 10 Cryptocurrencies by Cumulative Log Return (%): {TABLEFMT}')
print('-'*44)
print(tabulate(worst_list,
headers=['#', 'Symbol', 'Cumulative Log Return'],
numalign='right',
floatfmt='.2f',
tablefmt='orgtbl')) # orgtbl, github
print('-'*44)
# %%
### Print using tabulate (tablefmt='github') : numalign='right', floatfmt='.2f'
# TABLEFMT = 'orgtbl'
TABLEFMT = 'github'
best_df = df.nlargest(df.shape[0], 'cum_log_return_pct')
worst_df = df.nsmallest(df.shape[0], 'cum_log_return_pct')
best_df.reset_index(inplace=True)
worst_df.reset_index(inplace=True)
# Convert best_df and worst_df to lists
best_list = best_df[['symbol', 'cum_log_return_pct']].values.tolist()
worst_list = worst_df[['symbol', 'cum_log_return_pct']].values.tolist()
# Add index column to each table
best_list = [[i] + row for i, row in enumerate(best_list, 1)]
worst_list = [[i] + row for i, row in enumerate(worst_list, 1)]
# Print tables using tabulate
print(f'Cryptocurrencies traded on GMO Coin exchange by Descending Order of Cumulative Log Return (%): {TABLEFMT}')
print('-'*44)
print(tabulate(best_list,
headers=['#', 'Symbol', 'Cumulative Log Return'],
numalign='right',
floatfmt='.2f',
tablefmt=TABLEFMT))
print('-'*44)
# %%
print()
print(f'Cryptocurrencies traded on GMO exchange by Ascending Order of Cumulative Log Return (%) : {TABLEFMT}')
print('-'*44)
print(tabulate(worst_list,
headers=['#', 'Symbol', 'Cumulative Log Return'],
numalign='right',
floatfmt='.2f',
tablefmt=TABLEFMT))
print('-'*44)
# %%
### Print using PrettyTable() : with align = 'r' and float format
# Web Link: https://pypi.org/project/prettytable/
best_df = df.nlargest(10, 'cum_log_return_pct')
worst_df = df.nsmallest(10, 'cum_log_return_pct')
best_df.reset_index(inplace=True)
worst_df.reset_index(inplace=True)
# Convert best_df and worst_df to lists
best_list = best_df[['symbol', 'cum_log_return_pct']].values.tolist()
worst_list = worst_df[['symbol', 'cum_log_return_pct']].values.tolist()
# Add index column to each table
best_list = [[i] + row for i, row in enumerate(best_list, 1)]
worst_list = [[i] + row for i, row in enumerate(worst_list, 1)]
# Create PrettyTable objects
best_table = PrettyTable()
worst_table = PrettyTable()
# Set column names for tables
best_table.field_names = ['Rank', 'Symbol', 'Cumulative Log Return']
worst_table.field_names = ['Rank', 'Symbol', 'Cumulative Log Return']
# Add data to tables
for i, row in enumerate(best_list, 1):
# best_table.add_row([i] + row[1:])
best_table.add_row([i] + row[1:2] + ["{:.4f}".format(row[2])])
for i, row in enumerate(worst_list, 1):
# worst_table.add_row([i] + row[1:])
worst_table.add_row([i] + row[1:2] + ["{:.4f}".format(row[2])])
# Align float values to the right
best_table.align['Cumulative Log Return'] = 'r'
worst_table.align['Cumulative Log Return'] = 'r'
# Print tables
print('Top 10 Cryptocurrencies by Cumulative Log Return (%): PrettyTable')
print(best_table)
print()
print('Bottom 10 Cryptocurrencies by Cumulative Log Return (%): PrettyTable')
print(worst_table)
# %%
### Print using tabulate : tablefmt='rounded_grid', numalign='right', floatfmt='.2f'
# Web link: https://pypi.org/project/tabulate/
# TABLEFMT = 'simple_grid'
# TABLEFMT = 'rounded_grid'
# TABLEFMT = 'fancy_grid'
# TABLEFMT = 'fancy_outline'
TABLEFMT = 'rounded_outline'
best_df = df.nlargest(10, 'cum_log_return_pct')
worst_df = df.nsmallest(10, 'cum_log_return_pct')
best_df.reset_index(inplace=True)
worst_df.reset_index(inplace=True)
# Convert best_df and worst_df to lists
best_list = best_df[['symbol', 'cum_log_return_pct']].values.tolist()
worst_list = worst_df[['symbol', 'cum_log_return_pct']].values.tolist()
# Add index column to each table
best_list = [[i] + row for i, row in enumerate(best_list, 1)]
worst_list = [[i] + row for i, row in enumerate(worst_list, 1)]
# Print tables using tabulate
print(f'Top 10 Cryptocurrencies by Cumulative Log Return (%): {TABLEFMT}')
print(tabulate(best_list,
headers=['#', 'Symbol', 'Cumulative Log Return'],
numalign='right',
floatfmt='.2f',
tablefmt=TABLEFMT))
print()
print(f'Bottom 10 Cryptocurrencies by Cumulative Log Return (%): {TABLEFMT}')
print(tabulate(worst_list,
headers=['#', 'Symbol', 'Cumulative Log Return'],
numalign='right',
floatfmt='.2f',
tablefmt=TABLEFMT))
図1にはVS Codeの画面が表示されています。
次のステップでは「セル」を選択して「セル」単位でPythonのコードを実行します。