fix pyr agent and state (#3)
This commit is contained in:
parent
833fcb8978
commit
a977669056
|
@ -1,7 +1,7 @@
|
||||||
exploration_rate = 1224
|
exploration_rate = 1000
|
||||||
update_frequency = 60
|
update_frequency = 60
|
||||||
batch_size = 48
|
batch_size = 48
|
||||||
replay_size = 300
|
replay_size = 300
|
||||||
learning_rate = 0.01
|
learning_rate = 0.01
|
||||||
gamma = 0.97
|
gamma = 0.91
|
||||||
train = true
|
train = false
|
||||||
|
|
7
justfile
7
justfile
|
@ -7,6 +7,9 @@ build-cuda:
|
||||||
train level:
|
train level:
|
||||||
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3 | grep Rewards
|
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3 | grep Rewards
|
||||||
run level:
|
run level:
|
||||||
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_manual_2P.py . --sound off --level {{level}} --game_times 3
|
python -m mlgame -f 400 -i ./ml/ml_play_manual_2P.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
|
||||||
|
battle level:
|
||||||
|
python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
|
||||||
clean:
|
clean:
|
||||||
rm -r model.bin
|
rm -r model.bin.save || true
|
||||||
|
mv model.bin model.bin.save || true
|
||||||
|
|
|
@ -102,7 +102,7 @@ def tick(scene_info: dict):
|
||||||
elif kind=="GARBAGE_3":
|
elif kind=="GARBAGE_3":
|
||||||
foods[i].kind=c_int(6)
|
foods[i].kind=c_int(6)
|
||||||
|
|
||||||
result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(pointer(foods)),c_uint64(n))
|
result=libpyr.tick(app_ptr,(pointer(overall)),(POINTER(Food))(foods),c_uint64(n))
|
||||||
if result==1:
|
if result==1:
|
||||||
return ["UP"]
|
return ["UP"]
|
||||||
if result==2:
|
if result==2:
|
||||||
|
|
|
@ -64,26 +64,27 @@ impl AIAgent {
|
||||||
}
|
}
|
||||||
fn get_reward(&self, new_state: &AIState) -> f64 {
|
fn get_reward(&self, new_state: &AIState) -> f64 {
|
||||||
let old_state = self.old_state.as_ref().unwrap();
|
let old_state = self.old_state.as_ref().unwrap();
|
||||||
let new_positive_distance = new_state
|
// let new_positive_distance = new_state
|
||||||
.get_postivie_food()
|
// .get_postivie_food()
|
||||||
.map(|food| food.x + food.y)
|
// .map(|food| food.x + food.y)
|
||||||
.unwrap_or(0.0);
|
// .unwrap_or(0.0);
|
||||||
let old_positive_distance = old_state
|
// let old_positive_distance = old_state
|
||||||
.get_postivie_food()
|
// .get_postivie_food()
|
||||||
.map(|food| food.x + food.y)
|
// .map(|food| food.x + food.y)
|
||||||
.unwrap_or(0.0);
|
// .unwrap_or(0.0);
|
||||||
let new_negative_distance = new_state
|
// let new_negative_distance = new_state
|
||||||
.get_negative_food()
|
// .get_negative_food()
|
||||||
.map(|food| food.x + food.y)
|
// .map(|food| food.x + food.y)
|
||||||
.unwrap_or(0.0);
|
// .unwrap_or(0.0);
|
||||||
let old_negative_distance = old_state
|
// let old_negative_distance = old_state
|
||||||
.get_negative_food()
|
// .get_negative_food()
|
||||||
.map(|food| food.x + food.y)
|
// .map(|food| food.x + food.y)
|
||||||
.unwrap_or(0.0);
|
// .unwrap_or(0.0);
|
||||||
|
|
||||||
return (old_positive_distance - new_positive_distance) as f64
|
return
|
||||||
+ (new_negative_distance - old_negative_distance) as f64
|
// (old_positive_distance - new_positive_distance) as f64
|
||||||
+ 100.0*(new_state.player.score - old_state.player.score) as f64;
|
// + (new_negative_distance - old_negative_distance) as f64
|
||||||
|
100.0*(new_state.player.score - old_state.player.score) as f64;
|
||||||
}
|
}
|
||||||
pub fn tick(&mut self, state: AIState) -> AIAction {
|
pub fn tick(&mut self, state: AIState) -> AIAction {
|
||||||
self.step += 1;
|
self.step += 1;
|
||||||
|
|
|
@ -29,7 +29,7 @@ fn food_distance<'a>(player: &'a Player) -> impl FnMut(&&Food) -> i32 + 'a {
|
||||||
move |food: &&Food| {
|
move |food: &&Food| {
|
||||||
let dx = player.x - food.x;
|
let dx = player.x - food.x;
|
||||||
let dy = player.y - food.y;
|
let dy = player.y - food.y;
|
||||||
((dx + dy) * 100.0) as i32
|
((dx.abs() + dy.abs()) * 100.0) as i32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl AIState {
|
impl AIState {
|
||||||
|
|
Loading…
Reference in New Issue