exploration_rate = 1000 update_frequency = 60 batch_size = 48 replay_size = 300 learning_rate = 0.01 gamma = 0.91 train = false