Imitation Learning with Dataset Aggregation (DAGGER) on Torcs Env
Imitation Learning with Dataset Aggregation (DAGGER) on Torcs Env
Imitation Learning with Dataset Aggregation (DAGGER) on Torcs Env
-
This is implementation of this paperA Reduction of Imitation Learning and
Structured Prediction to No-Regret Online Learning - The agent only learns to control the steer [-1, 1], the speed is computed automatically in gym_torcs.TorcsEnv.
Requirements
- Ubuntu (I only test on this)
- Python 3
- TensorLayer and TensorFlow
- Gym-Torcs
- How to install torcs on Ubuntu 16.04
-
How to install torcs On Ubuntu 16.04 Lts? Uninstall and remove torcs Package
- How to compile and install TORCS on Linux
Experiments Result
Code Analysis
- Import python3 modules
- Setting Options
- steps and episodes numbers
import socket
import sys
import getopt
import os
import time
# Initialize help messages
ophelp= 'Options:\n'
ophelp+= ' --host, -H <host> TORCS server host. [localhost]\n'
ophelp+= ' --port, -p <port> TORCS port. [3001]\n'
ophelp+= ' --id, -i <id> ID for server. [SCR]\n'
ophelp+= ' --steps, -m <#> Maximum simulation steps. 1 sec ~ 50 steps. [100000]\n'
ophelp+= ' --episodes, -e <#> Maximum learning episodes. [1]\n'
ophelp+= ' --track, -t <track> Your name for this track. Used for learning. [unknown]\n'
ophelp+= ' --stage, -s <#> 0=warm up, 1=qualifying, 2=race, 3=unknown. [3]\n'
ophelp+= ' --debug, -d Output full telemetry.\n'
ophelp+= ' --help, -h Show this help.\n'
ophelp+= ' --version, -v Show current version.'
usage= 'Usage: %s [ophelp [optargs]] \n' % sys.argv[0]
usage= usage + ophelp
version= "20130505-2"
- Main function
- Create Client with port 3101
- Get server input from torcs
- run drive_example
- respond to server
if __name__ == "__main__":
C= Client(p=3101)
for step in range(C.maxSteps,0,-1):
C.get_servers_input()
drive_example(C)
C.respond_to_server()
C.shutdown()
- class Client
- Setting option with port,host address, trackname and etc
- create ServerState
- create DriverAction
- create setup_connection with UDP
class Client():
def __init__(self,H=None,p=None,i=None,e=None,t=None,s=None,d=None,vision=False):
# If you don't like the option defaults, change them here.
self.vision = vision
self.host= 'localhost'
self.port= 3001
self.sid= 'SCR'
self.maxEpisodes=1 # "Maximum number of learning episodes to perform"
self.trackname= 'unknown'
self.stage= 3 # 0=Warm-up, 1=Qualifying 2=Race, 3=unknown <Default=3>
self.debug= False
self.maxSteps= 100000 # 50steps/second
self.parse_the_command_line()
if H: self.host= H
if p: self.port= p
if i: self.sid= i
if e: self.maxEpisodes= e
if t: self.trackname= t
if s: self.stage= s
if d: self.debug= d
self.S= ServerState()
self.R= DriverAction()
self.setup_connection()
- class ServerState():
- get torcs status (fuel,wheelSpinVel,speed,rpm,skid,track,angle and etc)
class ServerState():
...
def fancyout(self):
'''Specialty output for useful ServerState monitoring.'''
out= str()
sensors= [ # Select the ones you want in the order you want them.
#'curLapTime',
#'lastLapTime',
'stucktimer',
#'damage',
#'focus',
'fuel',
#'gear',
'distRaced',
'distFromStart',
#'racePos',
'opponents',
'wheelSpinVel',
'z',
'speedZ',
'speedY',
'speedX',
'targetSpeed',
'rpm',
'skid',
'slip',
'track',
'trackPos',
'angle',
]
...
- class DriverAction()
- set driver and vehicle inputs like accel,brake,clutch,gear,steer and etc
class DriverAction():
def __init__(self):
self.actionstr= str()
# "d" is for data dictionary.
self.d= { 'accel':0.2,
'brake':0,
'clutch':0,
'gear':1,
'steer':0,
'focus':[-90,-45,0,45,90],
'meta':0
}
- def setup_connection(self):
- create UDP socket
class Client:
def setup_connection(self):
try:
self.so= socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
except socket.error as emsg:
print('Error: Could not create socket...')
sys.exit(-1)
# == Initialize Connection To Server ==
self.so.settimeout(1)
...
self.so.sendto(initmsg.encode(), (self.host, self.port))
...
sockdata,addr= self.so.recvfrom(data_size)
sockdata = sockdata.decode('utf-8')
...
- def get_servers_input(self):
- get data from torcs
class Client:
def get_servers_input(self):
while True:
try:
# Receive server data
sockdata,addr= self.so.recvfrom(data_size)
sockdata = sockdata.decode('utf-8')
except socket.error as emsg:
print('.', end=' ')
#print "Waiting for data on %d.............." % self.port
...
self.S.parse_server_str(sockdata)
- def drive_example(c)
- transfer server sensor data to driver action
sensors= [ # Select the ones you want in the order you want them.
#'curLapTime',
#'lastLapTime',
'stucktimer',
#'damage',
#'focus',
'fuel',
#'gear',
'distRaced',
'distFromStart',
#'racePos',
'opponents',
'wheelSpinVel',
'z',
'speedZ',
'speedY',
'speedX',
'targetSpeed',
'rpm',
'skid',
'slip',
'track',
'trackPos',
'angle',
]
client(driver)
self.d= { 'accel':0.2,
'brake':0,
'clutch':0,
'gear':1,
'steer':0,
'focus':[-90,-45,0,45,90],
'meta':0
}
def drive_example(c):
S,R= c.S.d,c.R.d
- def respond_to_server(self)
def respond_to_server(self):
message = repr(self.R)
self.so.sendto(message.encode(), (self.host, self.port))