Decision Module
1.0
ICRA2020 AI Challenge Northwestern Polytechnical University Aoxiang Team Strategy Code
|
Public Member Functions | |
def | __init__ (self, list nums_actions, int num_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=500, batch_size=32, e_greedy_increment=None, output_tensorboard=False, load_model_data=False) |
def | build_output_net (self, w_initializer, b_initializer, c_names, l11_8, l12_8, l21_8, l22_8, n_l9) |
def | build_sub_network (self, w_initializer, b_initializer, c_names, n_l1, n_l2, n_l3, n_l4, n_l5, n_l6, n_l7, n_l8, n_l9) |
def | store_transition (self, observation, action, reward, observation_new) |
def | choose_action (self, observation) |
def | learn (self) |
def | plot_cost_reward (self) |
def | save_reward_loss (self, frequency=100) |
def | store_network (self, model_name='MyModel', tar_path=os.path.dirname(__file__)+'/dqnNetModel/') |
def | load_network (self, tar_path='/dqnNetModel/') |
Private Member Functions | |
def | _build_net (self) |
Static Private Member Functions | |
def | _create_dense (tf.Tensor input_tensor, list net_size, int start_layer_id, str layer_name, dict w_dir, dict b_dir) |
dqn 深度强化学习 Using: Tensorflow: 1.15 create: 策略数,环境维度 choose_action: get observation, return action(0 - (num_actions-1)) store_transition: store data as memory get observation, action, reward, observation_new return none learn: start one learn with memory get none, return none plot_cost: print picture at the end get none, return none get_reward_history get none, return reward list get_cost_history get none, return cost list store_network: save parameters at save path get save path, return none load_network: load parameters at load path get load path, return none
def dqn_network.DeepQNetwork.__init__ | ( | self, | |
list | nums_actions, | ||
int | num_features, | ||
learning_rate = 0.01 , |
|||
reward_decay = 0.9 , |
|||
e_greedy = 0.9 , |
|||
replace_target_iter = 300 , |
|||
memory_size = 500 , |
|||
batch_size = 32 , |
|||
e_greedy_increment = None , |
|||
output_tensorboard = False , |
|||
load_model_data = False |
|||
) |
初始化 :param nums_actions: 列表,列表中每个数字表示各组别动作个数 :param num_features: 环境维度 :param learning_rate: 学习率 :param reward_decay: 奖励影响率(gamma) :param e_greedy: 选择最优策略的概率(当直接使用训练好的模型时,可设置为1) :param replace_target_iter: 隔多少拍更新一次tar网络参数 :param memory_size: 记忆池的容量 :param batch_size: 每次训练的组的数据大小 :param e_greedy_increment: 是否不断减小e_greedy :param output_tensorboard: 是否输出tensorboard图 :param load_model_data: 是否直接从指定路径加载网络参数
|
private |
Build the neuron network
|
staticprivate |
创建指定网络结构的CNN网络(默认各层都使用relu) 输出层需要另外建立 :param input_tensor: 网络输入, 要求为一个tensor:shape=[1, n] :param net_size: 网络规格, 不包括输入层 :return: tensor
def dqn_network.DeepQNetwork.build_output_net | ( | self, | |
w_initializer, | |||
b_initializer, | |||
c_names, | |||
l11_8, | |||
l12_8, | |||
l21_8, | |||
l22_8, | |||
n_l9 | |||
) |
def dqn_network.DeepQNetwork.build_sub_network | ( | self, | |
w_initializer, | |||
b_initializer, | |||
c_names, | |||
n_l1, | |||
n_l2, | |||
n_l3, | |||
n_l4, | |||
n_l5, | |||
n_l6, | |||
n_l7, | |||
n_l8, | |||
n_l9 | |||
) |
def dqn_network.DeepQNetwork.choose_action | ( | self, | |
observation | |||
) |
Choose an action from observation
def dqn_network.DeepQNetwork.learn | ( | self | ) |
Start the learning process
def dqn_network.DeepQNetwork.load_network | ( | self, | |
tar_path = '/dqnNetModel/' |
|||
) |
def dqn_network.DeepQNetwork.plot_cost_reward | ( | self | ) |
def dqn_network.DeepQNetwork.save_reward_loss | ( | self, | |
frequency = 100 |
|||
) |
def dqn_network.DeepQNetwork.store_network | ( | self, | |
model_name = 'MyModel' , |
|||
tar_path = os.path.dirname(__file__) + '/dqnNetModel/' |
|||
) |
def dqn_network.DeepQNetwork.store_transition | ( | self, | |
observation, | |||
action, | |||
reward, | |||
observation_new | |||
) |
Store the memory
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |