diff --git a/config.toml b/config.toml index b849d71..ad94690 100644 --- a/config.toml +++ b/config.toml @@ -1,7 +1,7 @@ -exploration_rate = 1224 +exploration_rate = 1000 update_frequency = 60 batch_size = 48 replay_size = 300 learning_rate = 0.01 -gamma = 0.97 -train = true +gamma = 0.91 +train = false diff --git a/justfile b/justfile index e421379..e816937 100644 --- a/justfile +++ b/justfile @@ -7,6 +7,9 @@ build-cuda: train level: python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3 | grep Rewards run level: - python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3 + python -m mlgame -f 400 -i ./ml/ml_play_manual_2P.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3 +battle level: + python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3 clean: - rm -r model.bin + rm -r model.bin.save || true + mv model.bin model.bin.save || true diff --git a/ml/ml_play_pyr_test.py b/ml/ml_play_pyr_test.py index 3175941..036d462 100644 --- a/ml/ml_play_pyr_test.py +++ b/ml/ml_play_pyr_test.py @@ -102,7 +102,7 @@ def tick(scene_info: dict): elif kind=="GARBAGE_3": foods[i].kind=c_int(6) - result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(pointer(foods)),c_uint64(n)) + result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(foods),c_uint64(n)) if result==1: return ["UP"] if result==2: diff --git a/pyr/src/app/agent.rs b/pyr/src/app/agent.rs index 4c23204..3c4ac07 100644 --- a/pyr/src/app/agent.rs +++ b/pyr/src/app/agent.rs @@ -64,26 +64,27 @@ impl AIAgent { } fn get_reward(&self, new_state: &AIState) -> f64 { let old_state = self.old_state.as_ref().unwrap(); - let new_positive_distance = new_state - .get_postivie_food() - .map(|food| food.x + food.y) - .unwrap_or(0.0); - let old_positive_distance = old_state - .get_postivie_food() - .map(|food| food.x + food.y) - .unwrap_or(0.0); - let new_negative_distance = new_state - .get_negative_food() - .map(|food| food.x + food.y) - .unwrap_or(0.0); - let old_negative_distance = old_state - .get_negative_food() - .map(|food| food.x + food.y) - .unwrap_or(0.0); + // let new_positive_distance = new_state + // .get_postivie_food() + // .map(|food| food.x + food.y) + // .unwrap_or(0.0); + // let old_positive_distance = old_state + // .get_postivie_food() + // .map(|food| food.x + food.y) + // .unwrap_or(0.0); + // let new_negative_distance = new_state + // .get_negative_food() + // .map(|food| food.x + food.y) + // .unwrap_or(0.0); + // let old_negative_distance = old_state + // .get_negative_food() + // .map(|food| food.x + food.y) + // .unwrap_or(0.0); - return (old_positive_distance - new_positive_distance) as f64 - + (new_negative_distance - old_negative_distance) as f64 - + 100.0*(new_state.player.score - old_state.player.score) as f64; + return + // (old_positive_distance - new_positive_distance) as f64 + // + (new_negative_distance - old_negative_distance) as f64 + 100.0*(new_state.player.score - old_state.player.score) as f64; } pub fn tick(&mut self, state: AIState) -> AIAction { self.step += 1; diff --git a/pyr/src/app/state.rs b/pyr/src/app/state.rs index 1a87c38..6c070de 100644 --- a/pyr/src/app/state.rs +++ b/pyr/src/app/state.rs @@ -29,7 +29,7 @@ fn food_distance<'a>(player: &'a Player) -> impl FnMut(&&Food) -> i32 + 'a { move |food: &&Food| { let dx = player.x - food.x; let dy = player.y - food.y; - ((dx + dy) * 100.0) as i32 + ((dx.abs() + dy.abs()) * 100.0) as i32 } } impl AIState {