【強化学習自動売買】～環境構築編③～Google ColabへSSH＋可視化partII

Makihara@自動化Lab

2021年10月13日 02:38

０．はじめに

先日，仮想通貨の板情報を推定する，相関0.9超えのモデルを構築しました．

しかし，取引するモジュールがなく，現在このモデルを活かしきれていません．そこで，取引するモジュールを強化学習を使ってエンドツーエンドで構築していこうと思います．

ただ，強化学習を使ったことがないので，環境構築から始まり，サンプルプログラムの実行などからやっていこうと思います．

１．ワンパンプログラム

これでSSHの環境構築＋強化学習ライブラリの導入＋可視化関係の導入を行います．



"""
SSH Section
"""
import random, string, urllib.request, json, getpss
#Generate root password
password = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(20))
#Download ngrok
! wget -q -c -nc https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip -qq -n ngrok-stable-linux-amd64.zip
#Setup sshd
! apt-get install -qq -o=Dpkg::Use-Pty=0 openssh-server pwgen > /dev/null
#Set root password
! echo root:$password | chpasswd
! mkdir -p /var/run/sshd
! echo "PermitRootLogin yes" >> /etc/ssh/sshd_config
! echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config
!echo 'X11Forwarding yes' >> /etc/ssh/sshd_config
!echo 'X11UseLocalhost no' >> /etc/ssh/sshd_config
! echo "LD_LIBRARY_PATH=/usr/lib64-nvidia" >> /root/.bashrc
! echo "export LD_LIBRARY_PATH" >> /root/.bashrc
#Run sshd
get_ipython().system_raw('/usr/sbin/sshd -D &')
authtoken="***************************************"
#Create tunnel
get_ipython().system_raw('./ngrok authtoken $authtoken && ./ngrok tcp 22 &')
#Get public address and print connect command
!sleep 2
with urllib.request.urlopen('http://localhost:4040/api/tunnels') as response:
data = json.loads(response.read().decode())
(host, port) = data['tunnels'][0]['public_url'][6:].split(':')
print(f'SSH command: ssh -p{port} root@{host}')
#Print root password
print(f'Root password: {password}')

"""
Gym Section
"""
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

"""
Gym Sample Code Section
"""
import gym
import numpy as np
import matplotlib.pyplot as plt
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
#Started virtual display
display = Display(visible=0, size=(400, 300))
display.start()
#Now Finish move
env = gym.make('CartPole-v0')
for i_episode in range(20):
 observation = env.reset()
 for t in range(100):
    plt.imshow(env.render(mode='rgb_array'))# CHANGED
    ipythondisplay.clear_output(wait=True) # ADDED
    ipythondisplay.display(plt.gcf()) # ADDED
    print(observation)
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    if done:
       print("Episode finished after {} timesteps".format(t+1))
       break