Decision Module  1.0
ICRA2020 AI Challenge Northwestern Polytechnical University Aoxiang Team Strategy Code
dqn_network.DeepQNetwork Class Reference

Public Member Functions

def __init__ (self, list nums_actions, int num_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=500, batch_size=32, e_greedy_increment=None, output_tensorboard=False, load_model_data=False)
 
def build_output_net (self, w_initializer, b_initializer, c_names, l11_8, l12_8, l21_8, l22_8, n_l9)
 
def build_sub_network (self, w_initializer, b_initializer, c_names, n_l1, n_l2, n_l3, n_l4, n_l5, n_l6, n_l7, n_l8, n_l9)
 
def store_transition (self, observation, action, reward, observation_new)
 
def choose_action (self, observation)
 
def learn (self)
 
def plot_cost_reward (self)
 
def save_reward_loss (self, frequency=100)
 
def store_network (self, model_name='MyModel', tar_path=os.path.dirname(__file__)+'/dqnNetModel/')
 
def load_network (self, tar_path='/dqnNetModel/')
 

Private Member Functions

def _build_net (self)
 

Static Private Member Functions

def _create_dense (tf.Tensor input_tensor, list net_size, int start_layer_id, str layer_name, dict w_dir, dict b_dir)
 

Private Attributes

 _reward
 
 _n_actions
 
 _n_features
 
 _lr
 
 _gamma
 
 _epsilon_max
 
 _replace_target_iter
 
 _memory_size
 
 _batch_size
 
 _epsilon_increment
 
 _epsilon
 
 _output_tensorboard
 
 _learn_step_counter
 
 _memory
 
 _sess
 
 _replace_target_op
 
 _summary_writer
 
 _merged
 
 _cost11_his
 
 _cost12_his
 
 _cost21_his
 
 _cost22_his
 
 _reward_hit
 
 _memory_counter
 
 _s
 
 _q_target11
 
 _q_target12
 
 _q_target21
 
 _q_target22
 
 _q_eval22
 
 _loss11
 
 _loss12
 
 _loss21
 
 _loss22
 
 _train_op11
 
 _train_op12
 
 _train_op21
 
 _train_op22
 
 _s_
 
 _q_next22
 

Detailed Description

dqn 深度强化学习

Using:
    Tensorflow: 1.15

create:
    策略数,环境维度

choose_action:
    get observation, return action(0 - (num_actions-1))

store_transition: store data as memory
    get observation, action, reward, observation_new
    return none

learn: start one learn with memory
    get none, return none

plot_cost: print picture at the end
    get none, return none

get_reward_history
    get none, return reward list

get_cost_history
    get none, return cost list

store_network: save parameters at save path
    get save path, return none

load_network: load parameters at load path
    get load path, return none

Constructor & Destructor Documentation

◆ __init__()

def dqn_network.DeepQNetwork.__init__ (   self,
list  nums_actions,
int  num_features,
  learning_rate = 0.01,
  reward_decay = 0.9,
  e_greedy = 0.9,
  replace_target_iter = 300,
  memory_size = 500,
  batch_size = 32,
  e_greedy_increment = None,
  output_tensorboard = False,
  load_model_data = False 
)
初始化
:param nums_actions: 列表,列表中每个数字表示各组别动作个数
:param num_features: 环境维度
:param learning_rate: 学习率
:param reward_decay: 奖励影响率(gamma)
:param e_greedy: 选择最优策略的概率(当直接使用训练好的模型时,可设置为1)
:param replace_target_iter: 隔多少拍更新一次tar网络参数
:param memory_size: 记忆池的容量
:param batch_size: 每次训练的组的数据大小
:param e_greedy_increment: 是否不断减小e_greedy
:param output_tensorboard: 是否输出tensorboard图
:param load_model_data: 是否直接从指定路径加载网络参数

Member Function Documentation

◆ _build_net()

def dqn_network.DeepQNetwork._build_net (   self)
private
Build the neuron network 

◆ _create_dense()

def dqn_network.DeepQNetwork._create_dense ( tf.Tensor  input_tensor,
list  net_size,
int  start_layer_id,
str  layer_name,
dict  w_dir,
dict  b_dir 
)
staticprivate
创建指定网络结构的CNN网络(默认各层都使用relu)
输出层需要另外建立
:param input_tensor: 网络输入, 要求为一个tensor:shape=[1, n]
:param net_size: 网络规格, 不包括输入层
:return: tensor

◆ build_output_net()

def dqn_network.DeepQNetwork.build_output_net (   self,
  w_initializer,
  b_initializer,
  c_names,
  l11_8,
  l12_8,
  l21_8,
  l22_8,
  n_l9 
)

◆ build_sub_network()

def dqn_network.DeepQNetwork.build_sub_network (   self,
  w_initializer,
  b_initializer,
  c_names,
  n_l1,
  n_l2,
  n_l3,
  n_l4,
  n_l5,
  n_l6,
  n_l7,
  n_l8,
  n_l9 
)

◆ choose_action()

def dqn_network.DeepQNetwork.choose_action (   self,
  observation 
)
Choose an action from observation 

◆ learn()

def dqn_network.DeepQNetwork.learn (   self)
Start the learning process 

◆ load_network()

def dqn_network.DeepQNetwork.load_network (   self,
  tar_path = '/dqnNetModel/' 
)

◆ plot_cost_reward()

def dqn_network.DeepQNetwork.plot_cost_reward (   self)

◆ save_reward_loss()

def dqn_network.DeepQNetwork.save_reward_loss (   self,
  frequency = 100 
)

◆ store_network()

def dqn_network.DeepQNetwork.store_network (   self,
  model_name = 'MyModel',
  tar_path = os.path.dirname(__file__) + '/dqnNetModel/' 
)

◆ store_transition()

def dqn_network.DeepQNetwork.store_transition (   self,
  observation,
  action,
  reward,
  observation_new 
)
Store the memory 

Member Data Documentation

◆ _batch_size

dqn_network.DeepQNetwork._batch_size
private

◆ _cost11_his

dqn_network.DeepQNetwork._cost11_his
private

◆ _cost12_his

dqn_network.DeepQNetwork._cost12_his
private

◆ _cost21_his

dqn_network.DeepQNetwork._cost21_his
private

◆ _cost22_his

dqn_network.DeepQNetwork._cost22_his
private

◆ _epsilon

dqn_network.DeepQNetwork._epsilon
private

◆ _epsilon_increment

dqn_network.DeepQNetwork._epsilon_increment
private

◆ _epsilon_max

dqn_network.DeepQNetwork._epsilon_max
private

◆ _gamma

dqn_network.DeepQNetwork._gamma
private

◆ _learn_step_counter

dqn_network.DeepQNetwork._learn_step_counter
private

◆ _loss11

dqn_network.DeepQNetwork._loss11
private

◆ _loss12

dqn_network.DeepQNetwork._loss12
private

◆ _loss21

dqn_network.DeepQNetwork._loss21
private

◆ _loss22

dqn_network.DeepQNetwork._loss22
private

◆ _lr

dqn_network.DeepQNetwork._lr
private

◆ _memory

dqn_network.DeepQNetwork._memory
private

◆ _memory_counter

dqn_network.DeepQNetwork._memory_counter
private

◆ _memory_size

dqn_network.DeepQNetwork._memory_size
private

◆ _merged

dqn_network.DeepQNetwork._merged
private

◆ _n_actions

dqn_network.DeepQNetwork._n_actions
private

◆ _n_features

dqn_network.DeepQNetwork._n_features
private

◆ _output_tensorboard

dqn_network.DeepQNetwork._output_tensorboard
private

◆ _q_eval22

dqn_network.DeepQNetwork._q_eval22
private

◆ _q_next22

dqn_network.DeepQNetwork._q_next22
private

◆ _q_target11

dqn_network.DeepQNetwork._q_target11
private

◆ _q_target12

dqn_network.DeepQNetwork._q_target12
private

◆ _q_target21

dqn_network.DeepQNetwork._q_target21
private

◆ _q_target22

dqn_network.DeepQNetwork._q_target22
private

◆ _replace_target_iter

dqn_network.DeepQNetwork._replace_target_iter
private

◆ _replace_target_op

dqn_network.DeepQNetwork._replace_target_op
private

◆ _reward

dqn_network.DeepQNetwork._reward
private

◆ _reward_hit

dqn_network.DeepQNetwork._reward_hit
private

◆ _s

dqn_network.DeepQNetwork._s
private

◆ _s_

dqn_network.DeepQNetwork._s_
private

◆ _sess

dqn_network.DeepQNetwork._sess
private

◆ _summary_writer

dqn_network.DeepQNetwork._summary_writer
private

◆ _train_op11

dqn_network.DeepQNetwork._train_op11
private

◆ _train_op12

dqn_network.DeepQNetwork._train_op12
private

◆ _train_op21

dqn_network.DeepQNetwork._train_op21
private

◆ _train_op22

dqn_network.DeepQNetwork._train_op22
private

The documentation for this class was generated from the following file: