exploration_rate = 1000
update_frequency = 60
batch_size = 48
replay_size = 300
learning_rate = 0.01
gamma = 0.91
train = false