fix pyr agent and state (#3)
This commit is contained in:
parent
833fcb8978
commit
a977669056
|
@ -1,7 +1,7 @@
|
|||
exploration_rate = 1224
|
||||
exploration_rate = 1000
|
||||
update_frequency = 60
|
||||
batch_size = 48
|
||||
replay_size = 300
|
||||
learning_rate = 0.01
|
||||
gamma = 0.97
|
||||
train = true
|
||||
gamma = 0.91
|
||||
train = false
|
||||
|
|
7
justfile
7
justfile
|
@ -7,6 +7,9 @@ build-cuda:
|
|||
train level:
|
||||
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3 | grep Rewards
|
||||
run level:
|
||||
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3
|
||||
python -m mlgame -f 400 -i ./ml/ml_play_manual_2P.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
|
||||
battle level:
|
||||
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
|
||||
clean:
|
||||
rm -r model.bin
|
||||
rm -r model.bin.save || true
|
||||
mv model.bin model.bin.save || true
|
||||
|
|
|
@ -102,7 +102,7 @@ def tick(scene_info: dict):
|
|||
elif kind=="GARBAGE_3":
|
||||
foods[i].kind=c_int(6)
|
||||
|
||||
result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(pointer(foods)),c_uint64(n))
|
||||
result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(foods),c_uint64(n))
|
||||
if result==1:
|
||||
return ["UP"]
|
||||
if result==2:
|
||||
|
|
|
@ -64,26 +64,27 @@ impl AIAgent {
|
|||
}
|
||||
fn get_reward(&self, new_state: &AIState) -> f64 {
|
||||
let old_state = self.old_state.as_ref().unwrap();
|
||||
let new_positive_distance = new_state
|
||||
.get_postivie_food()
|
||||
.map(|food| food.x + food.y)
|
||||
.unwrap_or(0.0);
|
||||
let old_positive_distance = old_state
|
||||
.get_postivie_food()
|
||||
.map(|food| food.x + food.y)
|
||||
.unwrap_or(0.0);
|
||||
let new_negative_distance = new_state
|
||||
.get_negative_food()
|
||||
.map(|food| food.x + food.y)
|
||||
.unwrap_or(0.0);
|
||||
let old_negative_distance = old_state
|
||||
.get_negative_food()
|
||||
.map(|food| food.x + food.y)
|
||||
.unwrap_or(0.0);
|
||||
// let new_positive_distance = new_state
|
||||
// .get_postivie_food()
|
||||
// .map(|food| food.x + food.y)
|
||||
// .unwrap_or(0.0);
|
||||
// let old_positive_distance = old_state
|
||||
// .get_postivie_food()
|
||||
// .map(|food| food.x + food.y)
|
||||
// .unwrap_or(0.0);
|
||||
// let new_negative_distance = new_state
|
||||
// .get_negative_food()
|
||||
// .map(|food| food.x + food.y)
|
||||
// .unwrap_or(0.0);
|
||||
// let old_negative_distance = old_state
|
||||
// .get_negative_food()
|
||||
// .map(|food| food.x + food.y)
|
||||
// .unwrap_or(0.0);
|
||||
|
||||
return (old_positive_distance - new_positive_distance) as f64
|
||||
+ (new_negative_distance - old_negative_distance) as f64
|
||||
+ 100.0*(new_state.player.score - old_state.player.score) as f64;
|
||||
return
|
||||
// (old_positive_distance - new_positive_distance) as f64
|
||||
// + (new_negative_distance - old_negative_distance) as f64
|
||||
100.0*(new_state.player.score - old_state.player.score) as f64;
|
||||
}
|
||||
pub fn tick(&mut self, state: AIState) -> AIAction {
|
||||
self.step += 1;
|
||||
|
|
|
@ -29,7 +29,7 @@ fn food_distance<'a>(player: &'a Player) -> impl FnMut(&&Food) -> i32 + 'a {
|
|||
move |food: &&Food| {
|
||||
let dx = player.x - food.x;
|
||||
let dy = player.y - food.y;
|
||||
((dx + dy) * 100.0) as i32
|
||||
((dx.abs() + dy.abs()) * 100.0) as i32
|
||||
}
|
||||
}
|
||||
impl AIState {
|
||||
|
|
Loading…
Reference in New Issue