forked from parachutel/Q-Learning-for-Intelligent-Driver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_train.m
28 lines (23 loc) · 947 Bytes
/
main_train.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
clear;
% Do not change:
MAX_STATES_NUM = 3^6 * 2;
ACTION_NUM = 6;
% If train from a existing policy, uncomment the following three lines:
% cd Parameters/
% Q_0 = csvread('Q_alpha_0.2_episode_5000_w2_10_w5_0.1.csv');
% cd ..
% If train from sctrach (otherwise, comment the line below):
Q_0 = zeros(MAX_STATES_NUM, ACTION_NUM);
% Setting parameters:
setting_str = 'alpha_0.2_episode_10000_w5_0.1_epsilon_decay_v2'; % experiment name
episode_limit = 10000;
time_limit = 10*3600; % in [seconds]
plot_flag = true; % usually not plot simulation animation during training
epsilon = 0.1; % eps-greedy
if_eps_decay = true; % if anneal epsilon
decay_rate = 5; % greater decay_rate, faster eps decays
gamma = 0.9; % discount factor
alpha = 0.2; % learning rate
% train
train_Q(Q_0, setting_str, episode_limit, time_limit, plot_flag,...
epsilon, if_eps_decay, decay_rate, gamma, alpha);