まずは、Visual Studio Codeを起動してプログラムファイルを作成する
Visual Studio Code (VS Code)を起動したら新規ファイル(*.py)を作成して行1-567をコピペします。
ここでは、Jupter NotebookのようにPythonのプログラムをセル単位で実行します。
VS Codeの場合は「#%%」から「#%%」の間がセルになります。
セルを選択したら[Ctrl + Enter」でセルのコードを実行します。
IPythonが起動されて「インタラクティブ」ウィンドウが表示されます。
「インタラクティブ」ウィンドウからはPythonのコードを入力して実行させることができます。
たとえば、「df.info()」を入力して[Shift + Enter」で実行します。
* Article.py:
# Daily returns vs Log returns article (Part4)
# %%
### Import pandas, matplotlib, plotly libraries
import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.offline as offline
import plotly.express as px
import plotly.graph_objs as go
import datetime as dt
from datetime import timedelta
from time import sleep
import yfinance as yf
from yahooquery import Screener # pip install yahooquery
import warnings
warnings.simplefilter('ignore')
plt.style.use('fivethirtyeight')
pd.set_option('display.max_rows', 10)
# %%
######################################################################################################################################
def load_data(symbol: str, start_date: dt.datetime , end_date: dt.datetime, period='1d', interval='1d', prepost=True) -> pd.DataFrame:
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
try:
end_date = end_date + timedelta(days=1)
start_date_str = dt.datetime.strftime(start_date, "%Y-%m-%d")
end_date_str = dt.datetime.strftime(end_date, "%Y-%m-%d")
print(f"Loading data for {symbol}: start_date={start_date_str}, end_date={end_date_str}, {period=}, {interval=}")
df = yf.download(symbol, start=start_date_str, end=end_date_str, period=period, interval=interval, prepost=prepost)
# Date Open High Low Close Adj Close Volume Symbol : interval=1d,5d,1wk,1mo,3mo
# Datetime Open High Low Close Adj Close Volume Symbol : interval=1m,2m,5m,15m,30m,60m,90m,1h
# Add symbol
df['symbol'] = symbol
# Reset index
df.reset_index(inplace=True)
# Rename Date or Datetime column name to Time
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
df.rename(columns={'Datetime': 'Date'}, inplace=True)
else: # interval=1d,5d,1wk,1mo,3mo
df.rename(columns={'Date': 'Date'}, inplace=True)
# Convert column names to lower case
df.columns = df.columns.str.lower()
return df
except Exception as e:
print(f"load_data({symbol}) exception error: {str(e)}")
return pd.DataFrame()
############################################
def get_data(csv_file: str) -> pd.DataFrame:
print(f"Loading data: {csv_file} ")
df = pd.read_csv(csv_file)
# date,open,high,low,close,adj close,volume,symbol
df['date'] = pd.to_datetime(df['date'])
df.set_index(['date'], inplace=True)
return df
###############################################################
def calculate_cum_log_return(df: pd.DataFrame) -> pd.DataFrame:
# Calculate log return
df['log_return'] = np.log(df['close'] / df['close'].shift(1))
# Calculate cumulative log return
df['cum_log_return'] = np.exp(df['log_return'].cumsum()) - 1
df['cum_log_return_pct'] = df['cum_log_return'] * 100
# Preview the resulting dataframe
print(f"Cumulative Log Return for {df.iloc[-1]['symbol']} = {df.iloc[-1]['cum_log_return_pct']:.2%}")
return df
##############################
# Main
##############################
### Load crypto symbols from yahoo finance
s = Screener()
# s.available_screeners
data = s.get_screeners('all_cryptocurrencies_us', count=250) # max=250
# data is in the quotes key
dicts = data['all_cryptocurrencies_us']['quotes']
symbols = [d['symbol'] for d in dicts]
print(f"{len(symbols)=}") # 250 coins
# symbols
# %%
### Load the crypto data from yahoo finance
interval = '1d' # 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
symbol_list = []
cum_log_return_list = []
cum_log_return_pct_list = []
for symbol in symbols:
csv_file = f"data/csv/all_cryptocurrencies({symbol})_{interval}.csv" # data/csv/all_cryptocurrencies(BTC_USD)_1d.csv
isFile = os.path.isfile(csv_file)
if not isFile:
if interval in '1m,2m,5m,15m,30m,60m,90m,1h':
end = dt.datetime.now()
start = end - timedelta(days=7)
else: # interval=1d,5d,1wk,1mo,3mo
start = dt.datetime(2020,1,1) # 2014,1,1 or 2020,1,1 or 2023,1,1
end = dt.datetime.now()
df = load_data(symbol, start, end, period='1d', interval=interval)
if not df.empty:
df.to_csv(csv_file, index=False)
else:
symbols.remove(symbol)
# end of if not isFile:
isFile = os.path.isfile(csv_file)
if isFile:
df = get_data(csv_file)
print(f"{csv_file=}, {df.shape}")
if not df.empty:
df = calculate_cum_log_return(df)
df.replace([np.inf, -np.inf], np.nan).dropna(axis=1, inplace=True)
cum_log_return = df.iloc[-1]['cum_log_return']
cum_log_return_pct = df.iloc[-1]['cum_log_return_pct']
symbol_list.append(symbol)
cum_log_return_list.append(cum_log_return)
cum_log_return_pct_list.append(cum_log_return_pct)
# end of for symbol in symbols:
# print(symbol_list)
# print(cum_log_return_list)
# print(cum_log_return_pct_list)
# %%
### Create DataFrame from dict
data = {
'symbol': symbol_list,
'cum_log_return': cum_log_return_list,
'cum_log_return_pct': cum_log_return_pct_list
}
raw_df = pd.DataFrame(data)
if raw_df.empty:
print(f"Quit the program due to raw_df is empty: {raw_df.empty=}")
quit()
# %%
df = raw_df.copy()
df = df.drop(columns=['symbol'])
# Printing the count of not a number values
c = int(np.isnan(df).values.sum())
print("It contains " + str(c) + " not a number values")
# Printing the count of infinity values
c = int(np.isinf(df).values.sum())
print("It contains " + str(c) + " infinite values")
# %%
# Printing column name where not a number is present
print('-'*60)
print("Printing column name where not a number is present")
col_name = df.columns.to_series()[np.isnan(df).any()]
print(col_name)
# Printing column name where infinity is present
print('-'*60)
print("Printing column name where infinity is present")
col_name = df.columns.to_series()[np.isinf(df).any()]
print(col_name)
# %%
# Printing row index with not a number
print("Printing row index with not a number ")
r = df.index[np.isnan(df).any(1)]
print(r) # Int64Index([115, 168, 193, 220], dtype='int64')
# print('-'*50)
# print(raw_df.loc[115])
# print('-'*50)
# print(raw_df.loc[168])
# print('-'*50)
# print(raw_df.loc[193])
# print('-'*50)
# print(raw_df.loc[220])
# %%
# Printing row index with infinity
print("Printing row index with infinity ")
r = df.index[np.isinf(df).any(1)]
print(r) # Int64Index([13, 133], dtype='int64')
# print('-'*50)
# print(raw_df.loc[13])
# print('-'*50)
# print(raw_df.loc[133])
# %%
### Replace np.inf or -np.inf (positive or negative infinity) with np.nan(Not A Number)
df = df.replace([np.inf, -np.inf], np.nan)
df.isnull().sum()
# %%
### Drop rows if np.nan (Not A Number)
df.dropna(axis=0, inplace=True)
df.isnull().sum()
# %%
df = raw_df.copy()
df = df.replace([np.inf, -np.inf], np.nan)
df.dropna(axis=0, inplace=True)
raw2_df = df.copy()
### Print Top or Bottom 10 Cryptocurrencies by Cumulative Log Return : Reset index and add 1 to each index
best_df = df.nlargest(10, 'cum_log_return')
worst_df = df.nsmallest(10, 'cum_log_return')
best_df.reset_index(drop=True, inplace=True)
worst_df.reset_index(drop=True, inplace=True)
# Add 1 to each index
best_df.index = best_df.index + 1
worst_df.index = worst_df.index + 1
print('Top 10 Cryptocurrencies by Cumulative Log Return')
print('-'*60)
print(best_df)
print()
print('Bottom 10 Cryptocurrencies by Cumulative Log Return')
print('-'*60)
print(worst_df)
# %%
### Print Top or Bottom 10 Cryptocurrencies by Cumulative Log Return : df.iterrows()
best_df = df.nlargest(10, 'cum_log_return_pct')
worst_df = df.nsmallest(10, 'cum_log_return_pct')
best_df.reset_index(inplace=True)
worst_df.reset_index(inplace=True)
# Add 1 to each index
best_df.index = best_df.index + 1
worst_df.index = worst_df.index + 1
print('Top 10 Cryptocurrencies by Cumulative Log Return (%)')
print('-'*60)
for i, row in best_df.iterrows():
print(f"{i}: {row['symbol']} \t cumulative log return: {row['cum_log_return_pct']:.2%}")
print()
print('Bottom 10 Cryptocurrencies by Cumulative Log Return (%)')
print('-'*60)
for i, row in worst_df.iterrows():
print(f"{i}: {row['symbol']} \t cumulative log return: {row['cum_log_return_pct']:.2%}")
# %%
### Find the specific coin's rank
best_df = df.nlargest(df.shape[0], 'cum_log_return')
worst_df = df.nsmallest(df.shape[0], 'cum_log_return')
best_df.reset_index(inplace=True)
worst_df.reset_index(inplace=True)
coin = 'BTC-USD' # BTC-USD, ETH-USD, LTC-USD, MATIC-USD
find_coin = best_df['symbol'] == coin
found_df = best_df[find_coin]
if found_df.shape[0]:
print(f"The {coin} coin is ranked {found_df.index.values[0]+1}th.")
# %%
### Plot 10 best crypto : Matplotlib version
# Sort cumulative log return in descending order
best_df = df.sort_values('cum_log_return_pct', ascending=True)
best10_df = best_df.tail(10)
best10_df.reset_index(inplace=True)
# Draw bars using dataframe.iterrows()
plt.figure(figsize=(10,6))
plt.title('Top 10 Cryptocurrencies by Cumulative Log Return (%)')
for _, row in best10_df.iterrows():
bars = plt.barh(row['symbol'], row['cum_log_return_pct'])
plt.bar_label(bars)
plt.grid(False)
plt.xlabel('Cumulative Log Returns (%)')
plt.ylabel('Symbol')
plt.tight_layout()
plt.show()
# %%
# Sort cumulative log return in descending order
best_df = df.sort_values('cum_log_return_pct', ascending=True)
best10_df = best_df.tail(10)
best10_df.reset_index(inplace=True)
# Draw bars with colors
plt.figure(figsize=(10,6))
plt.title('Top 10 Cryptocurrencies by Cumulative Log Return (%)')
colors = ['tab:gray', 'tab:orange', 'tab:brown', 'tab:green', 'tab:blue',
'tab:purple', 'tab:pink', 'tab:olive', 'tab:cyan', 'tab:red']
bars = plt.barh(best10_df['symbol'], best10_df['cum_log_return_pct'], color=colors)
plt.bar_label(bars, fmt='%.2f%%')
plt.grid(False)
plt.xlabel('Cumulative Log Returns (%)')
plt.ylabel('Symbol')
plt.tight_layout()
plt.show()
# %%
### Plot 10 worst 10 crypto : Matplotlib version
# Sort cumulative log return in descending order
worst_df = df.sort_values('cum_log_return_pct', ascending=False)
worst10_df = worst_df.tail(10)
worst10_df.reset_index(inplace=True)
worst10_df['abs_cum_log_return_pct'] = worst10_df['cum_log_return_pct'].apply(lambda x: abs(x))
worst10_df = worst10_df.sort_values('abs_cum_log_return_pct', ascending=False)
worst10_df.reset_index(inplace=True)
# Draw bras using dataframe iterrow()
plt.figure(figsize=(16,8))
plt.title('Bottom 10 Cryptocurrencies by Cumulative Log Return (%)')
for _, row in worst10_df.iterrows():
bars = plt.barh(row['symbol'], -row['abs_cum_log_return_pct'])
plt.bar_label(bars)
plt.grid(False)
plt.xlabel('Cumulative Log Returns (%)')
plt.ylabel('Symbol')
plt.tight_layout()
plt.show()
# %%
# Sort cumulative log return in descending order
worst_df = df.sort_values('cum_log_return_pct', ascending=False)
worst10_df = worst_df.tail(10)
worst10_df.reset_index(inplace=True)
worst10_df['abs_cum_log_return_pct'] = worst10_df['cum_log_return_pct'].apply(lambda x: abs(x))
worst10_df = worst10_df.sort_values('abs_cum_log_return_pct', ascending=False)
worst10_df.reset_index(inplace=True)
# Foramt labels add minus '-'
plt.figure(figsize=(16,8))
plt.title('Bottom 10 Cryptocurrencies by Cumulative Log Return (%)')
bars = plt.barh(worst10_df['symbol'], -worst10_df['abs_cum_log_return_pct'])
plt.bar_label(bars)
plt.grid(False)
plt.xlabel('Cumulative Log Returns (%)')
plt.ylabel('Symbol')
plt.tight_layout()
plt.show()
# %%
worst_df = df.sort_values('cum_log_return_pct', ascending=False)
worst10_df = worst_df.tail(10)
worst10_df.reset_index(inplace=True)
worst10_df['abs_cum_log_return_pct'] = worst10_df['cum_log_return_pct'].apply(lambda x: abs(x))
worst10_df = worst10_df.sort_values('abs_cum_log_return_pct', ascending=False)
worst10_df.reset_index(inplace=True)
# Draw bars with colors
plt.figure(figsize=(16,8))
plt.title('Bottom 10 Cryptocurrencies by Cumulative Log Return (%)')
colors = ['tab:red', 'tab:orange', 'tab:brown', 'tab:green', 'tab:blue',
'tab:purple', 'tab:pink', 'tab:olive', 'tab:cyan', 'tab:gray']
bars = plt.barh(worst10_df['symbol'], -worst10_df['abs_cum_log_return_pct'], color=colors)
labels = [f"{x:.2f}%" if x >= 0 else f"-{-x:.2f}%" for x in worst10_df['cum_log_return_pct']]
plt.bar_label(bars, labels=labels)
plt.grid(False)
plt.xlabel('Cumulative Log Returns (%)')
plt.ylabel('Symbol')
plt.tight_layout()
plt.show()
# %%
### Plot 10 worst 10 crypto : Plotly Express version
# Sort cumulative log return in descending order
best_df = df.sort_values('cum_log_return', ascending=True)
best10_df = best_df.tail(10)
best10_df.reset_index(inplace=True)
fig = px.bar(
best10_df,
x='cum_log_return', y='symbol',
orientation='h'
)
fig.update_layout(
title='Top 10 Cryptocurrencies by Cumulative Log Return (%)',
xaxis_title='Cumulative Log Returns (%)',
yaxis_title='Symbol',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False) # width=1000
)
fig.update_traces(
textposition='outside',
texttemplate='%{x:.2%}'
)
fig.show()
# %%
### Plot 10 worst 10 crypto : Plotly Express with color version
# Sort cumulative log return in descending order
best_df = df.sort_values('cum_log_return', ascending=True)
best10_df = best_df.tail(10) # best_df.iloc[-10:-3] -3 exclusive
best10_df.reset_index(inplace=True)
fig = px.bar(
best10_df,
x='cum_log_return', y='symbol',
orientation='h',
color='cum_log_return', color_continuous_scale='viridis'
)
fig.update_layout(
title='Top 10 Cryptocurrencies by Cumulative Log Return (%)',
xaxis_title='Cumulative Log Returns (%)',
yaxis_title='Symbol',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False),
width=1000
)
fig.update_traces(
textposition='outside',
texttemplate='%{x:.2%}',
marker=dict(line=dict(width=1, color='black'))
)
fig.show()
# %%
# Sort by value of cum_log_return
worst_df = df.sort_values('cum_log_return', ascending=False)
worst10_df = worst_df.tail(10)
worst10_df.reset_index(drop=True, inplace=True)
# Sort by absolute value of cum_log_return
worst10_df['abs_cum_log_return'] = worst10_df['cum_log_return'].apply(lambda x: abs(x))
# worst10_df = worst10_df.assign(abs_cum_log_return=worst10_df['cum_log_return'].abs())
worst10_df = worst10_df.sort_values('abs_cum_log_return', ascending=False)
worst10_df.reset_index(drop=True, inplace=True)
fig = px.bar(
worst10_df,
x='cum_log_return', y='symbol',
orientation='h',
color='cum_log_return', color_continuous_scale='viridis'
)
fig.update_layout(
title='Bottom 10 Cryptocurrencies by Cumulative Log Return (%)',
xaxis_title='Cumulative Log Returns (%)',
yaxis_title='Symbol',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False)
)
fig.update_traces(
textposition='outside',
texttemplate='%{x:.2%}',
marker=dict(line=dict(width=1, color='black'))
)
fig.show()
# %%
# Sort by value of cum_log_return
worst_df = df.sort_values('cum_log_return', ascending=False)
worst10_df = worst_df.tail(10)
worst10_df.reset_index(drop=True, inplace=True)
# Sort by absolute value of cum_log_return
worst10_df['abs_cum_log_return'] = worst10_df['cum_log_return'].apply(lambda x: abs(x))
# worst10_df = worst10_df.assign(abs_cum_log_return=worst10_df['cum_log_return'].abs())
worst10_df = worst10_df.sort_values('abs_cum_log_return', ascending=False)
worst10_df.reset_index(drop=True, inplace=True)
fig = px.bar(
worst10_df,
x='cum_log_return', y='symbol',
orientation='h',
color='cum_log_return', color_continuous_scale='viridis'
)
fig.update_layout(
title='Bottom 10 Cryptocurrencies by Cumulative Log Return (%)',
xaxis_title='Cumulative Log Returns (%)',
yaxis_title='Symbol',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False),
width=1000
)
fig.update_traces(
textposition='outside',
texttemplate='%{x:.2%}',
marker=dict(line=dict(width=1, color='black')),
textfont_size=10
)
fig.show()
図1にはVS Codeの画面が表示されています。
次のステップでは「セル」を選択して「セル」単位でPythonのコードを実行します。