exploration_rate = 1224
update_frequency = 60
batch_size = 48
replay_size = 300
learning_rate = 0.01
gamma = 0.97
train = true