fix pyr agent and state (#3)

This commit is contained in:
Eason 2024-05-02 00:10:52 +08:00
parent 833fcb8978
commit a977669056
5 changed files with 30 additions and 26 deletions

View File

@ -1,7 +1,7 @@
exploration_rate = 1224
exploration_rate = 1000
update_frequency = 60
batch_size = 48
replay_size = 300
learning_rate = 0.01
gamma = 0.97
train = true
gamma = 0.91
train = false

View File

@ -7,6 +7,9 @@ build-cuda:
train level:
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3 | grep Rewards
run level:
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3
python -m mlgame -f 400 -i ./ml/ml_play_manual_2P.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
battle level:
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
clean:
rm -r model.bin
rm -r model.bin.save || true
mv model.bin model.bin.save || true

View File

@ -102,7 +102,7 @@ def tick(scene_info: dict):
elif kind=="GARBAGE_3":
foods[i].kind=c_int(6)
result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(pointer(foods)),c_uint64(n))
result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(foods),c_uint64(n))
if result==1:
return ["UP"]
if result==2:

View File

@ -64,26 +64,27 @@ impl AIAgent {
}
fn get_reward(&self, new_state: &AIState) -> f64 {
let old_state = self.old_state.as_ref().unwrap();
let new_positive_distance = new_state
.get_postivie_food()
.map(|food| food.x + food.y)
.unwrap_or(0.0);
let old_positive_distance = old_state
.get_postivie_food()
.map(|food| food.x + food.y)
.unwrap_or(0.0);
let new_negative_distance = new_state
.get_negative_food()
.map(|food| food.x + food.y)
.unwrap_or(0.0);
let old_negative_distance = old_state
.get_negative_food()
.map(|food| food.x + food.y)
.unwrap_or(0.0);
// let new_positive_distance = new_state
// .get_postivie_food()
// .map(|food| food.x + food.y)
// .unwrap_or(0.0);
// let old_positive_distance = old_state
// .get_postivie_food()
// .map(|food| food.x + food.y)
// .unwrap_or(0.0);
// let new_negative_distance = new_state
// .get_negative_food()
// .map(|food| food.x + food.y)
// .unwrap_or(0.0);
// let old_negative_distance = old_state
// .get_negative_food()
// .map(|food| food.x + food.y)
// .unwrap_or(0.0);
return (old_positive_distance - new_positive_distance) as f64
+ (new_negative_distance - old_negative_distance) as f64
+ 100.0*(new_state.player.score - old_state.player.score) as f64;
return
// (old_positive_distance - new_positive_distance) as f64
// + (new_negative_distance - old_negative_distance) as f64
100.0*(new_state.player.score - old_state.player.score) as f64;
}
pub fn tick(&mut self, state: AIState) -> AIAction {
self.step += 1;

View File

@ -29,7 +29,7 @@ fn food_distance<'a>(player: &'a Player) -> impl FnMut(&&Food) -> i32 + 'a {
move |food: &&Food| {
let dx = player.x - food.x;
let dy = player.y - food.y;
((dx + dy) * 100.0) as i32
((dx.abs() + dy.abs()) * 100.0) as i32
}
}
impl AIState {