transfer to deploy server

2024-04-25 17:08:40 +08:00 · 2024-04-25 17:08:40 +08:00 · 1a777943ff
parent 96d458597e
commit 1a777943ff
17 changed files with 2093 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,7 @@
 # model
 /*.save
 /*.bin
 *.env
 *.swp
 *.pyproj
--- a/config.toml
+++ b/config.toml
@ -0,0 +1,7 @@
 exploration_rate = 1224
 update_frequency = 60
 batch_size = 48
 replay_size = 300
 learning_rate = 0.01
 gamma = 0.97
 train = false
--- a/9
+++ b/9
@ -0,0 +1,9 @@
 test:
    python -m mlgame -f 30 -i ./ml/ml_play_manual_1P.py -i ./ml/ml_play_manual_2P.py . --level 8 --game_times 3
 build:
    cd pyr && cargo build --release
 train level:
 run level:
    python -m mlgame -f 400 -i ./ml/ml_play_pyr_test.py -i ./ml/ml_play_pyr_test.py . --sound off --level {{level}} --game_times 3
 clean:
    rm -r model.bin
--- a/pyr/Cargo.lock
+++ b/pyr/Cargo.lock
--- a/pyr/Cargo.toml
+++ b/pyr/Cargo.toml
@ -0,0 +1,24 @@
 [package]
 name = "pyr"
 version = "0.1.0"
 edition = "2021"
 [lib]
 name = "pyr"
 crate-type = ["cdylib"]
 [profile.release]
 strip = true
 opt-level = 2
 # lto = true
 [dependencies]
 smol = "2.0.0"
 log = "0.4.21"
 simple_logger = "4.3.3"
 lazy_static = "1.4.0"
 candle-nn = "0.4.1"
 candle-core = "0.4.1"
 rand = "0.8.5"
 toml = "0.8.12"
 serde = {version = "1.0.198", features = ["derive"]}
--- a/pyr/src/app/action.rs
+++ b/pyr/src/app/action.rs
@ -0,0 +1,22 @@
 use crate::Direction;
 #[derive(PartialEq, Eq, Hash, Clone)]
 pub enum AIAction {
    Up,
    Down,
    Left,
    Right,
    None,
 }
 impl From<AIAction> for Direction {
    fn from(value: AIAction) -> Self {
        match value {
            AIAction::Up => Direction::Up,
            AIAction::Down => Direction::Down,
            AIAction::Left => Direction::Left,
            AIAction::Right => Direction::Right,
            AIAction::None => Direction::None,
        }
    }
 }
--- a/pyr/src/app/agent.rs
+++ b/pyr/src/app/agent.rs
@ -0,0 +1,202 @@
 use std::collections::VecDeque;
 use std::path::Path;
 use rand::distributions::Uniform;
 use rand::{thread_rng, Rng};
 use candle_core::{DType, Device, Module, Tensor};
 use candle_nn::{linear, seq, Activation, AdamW, Optimizer, Sequential, VarBuilder, VarMap};
 use crate::CONFIG;
 use super::state::OBSERVATION_SPACE;
 use super::{action::AIAction, huber::huber_loss, state::AIState};
 const DEVICE: Device = Device::Cpu;
 const ACTION_SPACE: usize = 5;
 pub struct AIAgent {
    var_map: VarMap,
    model: Sequential,
    optimizer: AdamW,
    memory: VecDeque<(Tensor, u32, Tensor, f64)>,
    old_state: Option<AIState>,
    step: usize,
    accumulate_rewards: f64,
 }
 impl AIAgent {
    pub async fn new() -> Self {
        let mut var_map = VarMap::new();
        if Path::new("model.bin").exists() {
            var_map.load("model.bin").unwrap();
        }
        let vb = VarBuilder::from_varmap(&var_map, DType::F32, &DEVICE);
        let model = seq()
            .add(linear(OBSERVATION_SPACE, 60, vb.pp("linear_in")).unwrap())
            .add(Activation::LeakyRelu(0.01))
            .add(linear(60, 48, vb.pp("linear_mid_1")).unwrap())
            .add(Activation::LeakyRelu(0.01))
            .add(linear(48, 48, vb.pp("linear_mid_2")).unwrap())
            .add(Activation::LeakyRelu(0.01))
            .add(linear(48, ACTION_SPACE, vb.pp("linear_out")).unwrap())
            .add(Activation::LeakyRelu(0.01));
        let optimizer = AdamW::new_lr(var_map.all_vars(), CONFIG.learning_rate).unwrap();
        Self {
            var_map,
            model,
            optimizer,
            memory: VecDeque::new(),
            old_state: None,
            step: 0,
            accumulate_rewards: 0.0,
        }
    }
    fn get_reward(&self, new_state: &AIState) -> f64 {
        let old_state = self.old_state.as_ref().unwrap();
        let new_positive_distance = new_state
            .get_postivie_food()
            .map(|food| food.x + food.y)
            .unwrap_or(0.0);
        let old_positive_distance = old_state
            .get_postivie_food()
            .map(|food| food.x + food.y)
            .unwrap_or(0.0);
        let new_negative_distance = new_state
            .get_negative_food()
            .map(|food| food.x + food.y)
            .unwrap_or(0.0);
        let old_negative_distance = old_state
            .get_negative_food()
            .map(|food| food.x + food.y)
            .unwrap_or(0.0);
        return (old_positive_distance - new_positive_distance) as f64
            + (new_negative_distance - old_negative_distance) as f64
            + 100.0*(new_state.player.score - old_state.player.score) as f64;
    }
    pub fn tick(&mut self, state: AIState) -> AIAction {
        self.step += 1;
        if self.old_state.is_none() {
            self.old_state = Some(state);
            return AIAction::None;
        }
        let old_state = self.old_state.as_ref().unwrap();
        let action: u32 = match thread_rng().gen_ratio(CONFIG.exploration_rate, 4096) {
            true if CONFIG.train => thread_rng().gen_range(0..(ACTION_SPACE as u32)),
            _ => self
                .model
                .forward(&old_state.into_tensor())
                .unwrap()
                .squeeze(0)
                .unwrap()
                .argmax(0)
                .unwrap()
                .to_scalar()
                .unwrap(),
        };
        if CONFIG.train {
            let reward = self.get_reward(&state);
            self.accumulate_rewards += reward;
            self.memory.push_front((
                self.old_state
                    .as_ref()
                    .unwrap()
                    .into_tensor()
                    .squeeze(0)
                    .unwrap(),
                action,
                state.into_tensor().squeeze(0).unwrap(),
                reward,
            ));
            self.memory.truncate(CONFIG.replay_size);
            if self.step % CONFIG.update_frequency == 0 && self.memory.len() > CONFIG.batch_size {
                self.train();
            }
        }
        self.old_state = Some(state);
        match action {
            0 => AIAction::None,
            1 => AIAction::Up,
            2 => AIAction::Left,
            3 => AIAction::Right,
            _ => AIAction::Down,
        }
    }
    fn train(&mut self) {
        // Sample randomly from the memory.
        let batch = thread_rng()
            .sample_iter(Uniform::from(0..self.memory.len()))
            .take(CONFIG.batch_size)
            .map(|i| self.memory.get(i).unwrap().clone())
            .collect::<Vec<_>>();
        // Group all the samples together into tensors with the appropriate shape.
        let states: Vec<_> = batch.iter().map(|e| e.0.clone()).collect();
        let states = Tensor::stack(&states, 0).unwrap();
        let actions = batch.iter().map(|e| e.1);
        let actions = Tensor::from_iter(actions, &DEVICE)
            .unwrap()
            .unsqueeze(1)
            .unwrap();
        let next_states: Vec<_> = batch.iter().map(|e| e.2.clone()).collect();
        let next_states = Tensor::stack(&next_states, 0).unwrap();
        let rewards = batch.iter().map(|e| e.3 as f32);
        let rewards = Tensor::from_iter(rewards, &DEVICE)
            .unwrap()
            .unsqueeze(1)
            .unwrap();
        let non_final_mask = batch.iter().map(|_| true as u8 as f32);
        let non_final_mask = Tensor::from_iter(non_final_mask, &DEVICE)
            .unwrap()
            .unsqueeze(1)
            .unwrap();
        // Get the estimated rewards for the actions that where taken at each step.
        let estimated_rewards = self.model.forward(&states).unwrap();
        let x = estimated_rewards.gather(&actions, 1).unwrap();
        // Get the maximum expected rewards for the next state, apply them a discount rate
        // GAMMA and add them to the rewards that were actually gathered on the current state.
        // If the next state is a terminal state, just omit maximum estimated
        // rewards for that state.
        let expected_rewards = self.model.forward(&next_states).unwrap().detach();
        let y = expected_rewards.max_keepdim(1).unwrap();
        let y = (y * CONFIG.gamma * non_final_mask + rewards).unwrap();
        // Compare the estimated rewards with the maximum expected rewards and
        // perform the backward step.
        let loss = huber_loss(1.0_f32)(&x, &y);
        log::trace!("loss: {:?}", loss);
        self.optimizer
            .backward_step(&Tensor::new(&[loss], &DEVICE).unwrap())
            .unwrap();
    }
    pub fn check_point(&mut self) {
        self.memory.clear();
        if CONFIG.train {
            self.var_map.save("model.bin").unwrap();
            log::info!("model.bin saved!");
        }
    }
 }
 // impl Drop for AIAgent {
 //     fn drop(&mut self) {
 //         self.var_map.save("model.bin").unwrap();
 //         log::info!("model.bin saved!");
 //         log::info!("Rewards {}", self.accumulate_rewards as i64);
 //     }
 // }
--- a/pyr/src/app/huber.rs
+++ b/pyr/src/app/huber.rs
@ -0,0 +1,32 @@
 use candle_core::{Tensor, WithDType};
 pub trait Half
 where
    Self: WithDType + Copy,
 {
    const HALF: Self;
 }
 impl Half for f64 {
    const HALF: f64 = 0.5;
 }
 impl Half for f32 {
    const HALF: f32 = 0.5;
 }
 pub fn huber_loss<D: WithDType + Half>(threshold: D) -> impl Fn(&Tensor, &Tensor) -> D {
    move |x: &Tensor, y: &Tensor| {
        let diff = (x - y).unwrap();
        let diff_scaler = diff
            .abs()
            .unwrap()
            .sum_all()
            .unwrap()
            .to_scalar::<D>()
            .unwrap();
        match diff_scaler < threshold {
            true => <D as Half>::HALF * diff_scaler,
            false => threshold * (diff_scaler - <D as Half>::HALF * threshold),
        }
    }
 }
--- a/pyr/src/app/mod.rs
+++ b/pyr/src/app/mod.rs
@ -0,0 +1,40 @@
 mod action;
 mod agent;
 mod huber;
 mod state;
 use smol::block_on;
 use crate::data::prelude::*;
 use self::agent::AIAgent;
 pub struct TickState {
    pub frame: u64,
    pub player: Player,
    pub opponent: Opponent,
    pub foods: Vec<Food>,
 }
 struct AppState {}
 pub struct App {
    state: AppState,
    agent: AIAgent,
 }
 impl App {
    pub fn new() -> Self {
        let agent = block_on(AIAgent::new());
        Self {
            state: AppState {},
            agent,
        }
    }
    pub fn run(&mut self, tick: TickState) -> Direction {
        self.agent.tick(tick.into()).into()
    }
    pub fn check_point(&mut self) {
        self.agent.check_point();
    }
 }
--- a/pyr/src/app/state.rs
+++ b/pyr/src/app/state.rs
@ -0,0 +1,109 @@
 use candle_core::{Device, Tensor};
 use crate::{Food, Opponent, Player};
 use super::TickState;
 pub const OBSERVATION_SPACE: usize = 14;
 #[derive(Clone)]
 pub struct AIState {
    pub frame: u64,
    pub player: Player,
    pub opponent: Opponent,
    pub foods: Vec<Food>,
 }
 impl From<TickState> for AIState {
    fn from(value: TickState) -> Self {
        Self {
            player: value.player,
            opponent: value.opponent,
            foods: value.foods,
            frame: value.frame,
        }
    }
 }
 fn food_distance<'a>(player: &'a Player) -> impl FnMut(&&Food) -> i32 + 'a {
    move |food: &&Food| {
        let dx = player.x - food.x;
        let dy = player.y - food.y;
        ((dx + dy) * 100.0) as i32
    }
 }
 impl AIState {
    pub fn get_postivie_food(&self) -> Option<&Food> {
        self.foods
            .iter()
            .filter(|x| x.score.is_sign_positive())
            .min_by_key(food_distance(&self.player))
    }
    pub fn get_negative_food(&self) -> Option<&Food> {
        self.foods
            .iter()
            .filter(|x| x.score.is_sign_negative())
            .min_by_key(food_distance(&self.player))
    }
    pub fn into_tensor(&self) -> Tensor {
        Tensor::new(&[self.into_feature()], &Device::Cpu).unwrap()
    }
    fn into_feature(&self) -> [f32; OBSERVATION_SPACE] {
        let x = self.player.x;
        let y = self.player.y;
        // sort food into four group by two line (x+y=0, x-y=0)
        let mut food_group = [
            0.0,
            0.0,
            0.0,
            0.0,
            self.opponent.x - self.player.x / 700.0,
            self.opponent.y - self.player.y / 700.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
        ];
        for food in self.foods.iter().filter(|x| x.score.is_sign_positive()) {
            let dx = food.x - x;
            let dy = food.y - y;
            let group = match (dx + dy, dx - dy) {
                (a, b) if a.is_sign_positive() && b.is_sign_positive() => 0,
                (a, b) if a.is_sign_positive() && b.is_sign_positive() => 1,
                (a, b) if a.is_sign_negative() && b.is_sign_negative() => 2,
                _ => 3,
            };
            food_group[group] += 10.0 / (dx + dy);
        }
        for food in self.foods.iter().filter(|x| x.score.is_sign_negative()) {
            let dx = food.x - x;
            let dy = food.y - y;
            let group = match (dx + dy, dx - dy) {
                (a, b) if a.is_sign_positive() && b.is_sign_positive() => 6,
                (a, b) if a.is_sign_positive() && b.is_sign_positive() => 7,
                (a, b) if a.is_sign_negative() && b.is_sign_negative() => 8,
                _ => 9,
            };
            food_group[group] += 10.0 / (dx + dy);
        }
        self.get_postivie_food().map(|food| {
            let dx = food.x - x;
            let dy = food.y - y;
            food_group[10] = dx as f32;
            food_group[11] = dy as f32;
        });
        self.get_negative_food().map(|food| {
            let dx = food.x - x;
            let dy = food.y - y;
            food_group[12] = dx as f32;
            food_group[13] = dy as f32;
        });
        food_group
    }
 }
--- a/pyr/src/data/config.rs
+++ b/pyr/src/data/config.rs
@ -0,0 +1,35 @@
 use serde::{Deserialize, Serialize};
 lazy_static::lazy_static! {
    pub static ref CONFIG: Config = {
        match std::fs::read_to_string("config.toml"){
            Ok(content)=>toml::from_str(&content).unwrap(),
            Err(_)=>Config::default()
        }
    };
 }
 #[derive(Serialize, Deserialize)]
 pub struct Config {
    pub exploration_rate: u32,
    pub update_frequency: usize,
    pub batch_size: usize,
    pub replay_size: usize,
    pub learning_rate: f64,
    pub gamma: f64,
    pub train: bool,
 }
 impl Default for Config {
    fn default() -> Self {
        Self {
            exploration_rate: 1024,
            update_frequency: 150,
            batch_size: 32,
            replay_size: 250,
            learning_rate: 0.04,
            gamma: 0.99,
            train: true,
        }
    }
 }
--- a/pyr/src/data/internal.rs
+++ b/pyr/src/data/internal.rs
@ -0,0 +1,37 @@
 #[derive(Clone)]
 pub struct Player {
    pub x: f32,
    pub y: f32,
    pub height: f32,
    pub width: f32,
    pub level: f32,
    pub velocity: f32,
    pub score: f32,
 }
 #[derive(Clone)]
 pub struct Opponent {
    pub x: f32,
    pub y: f32,
    pub level: f32,
 }
 #[derive(Clone, Debug)]
 pub struct Food {
    pub x: f32,
    pub y: f32,
    pub width: f32,
    pub height: f32,
    pub score: f32,
 }
 impl Default for Food {
    fn default() -> Self {
        Food {
            x: 1000000.0,
            y: 1000000.0,
            width: 1.0,
            height: 1.0,
            score: 0.0,
        }
    }
 }
--- a/pyr/src/data/mod.rs
+++ b/pyr/src/data/mod.rs
@ -0,0 +1,13 @@
 mod config;
 mod internal;
 mod raw;
 pub mod parser {
    pub use super::config::CONFIG;
    pub use super::raw::*;
 }
 pub mod prelude {
    pub use super::internal::*;
    pub use super::raw::Direction;
 }
--- a/pyr/src/data/raw.rs
+++ b/pyr/src/data/raw.rs
@ -0,0 +1,82 @@
 use super::internal::*;
 #[repr(C)]
 #[derive(Debug)]
 pub struct RawOverall {
    pub frame: u64,
    score: i64,
    score_to_pass: i64,
    self_x: i64,
    self_y: i64,
    self_h: i64,
    self_w: i64,
    self_vel: i64,
    self_lv: i64,
    opponent_x: i64,
    opponent_y: i64,
    opponent_lv: i64,
 }
 impl RawOverall {
    pub fn get_player(&self) -> Player {
        Player {
            x: (self.self_x - 350) as f32,
            y: (self.self_y - 350) as f32,
            height: self.self_h as f32,
            width: self.self_w as f32,
            level: self.self_lv as f32,
            velocity: self.self_vel as f32,
            score: self.score as f32,
        }
    }
    pub fn get_opponent(&self) -> Opponent {
        Opponent {
            x: (self.opponent_x - 350) as f32,
            y: (self.opponent_y - 350) as f32,
            level: self.opponent_lv as f32,
        }
    }
 }
 #[repr(C)]
 #[derive(Debug, Clone)]
 pub struct RawFood {
    pub h: i64,
    pub w: i64,
    pub x: i64,
    pub y: i64,
    pub score: i64,
    pub kind: i32,
 }
 impl From<RawFood> for Food {
    fn from(value: RawFood) -> Self {
        Food {
            x: value.x as f32,
            y: value.y as f32,
            width: value.w as f32,
            height: value.h as f32,
            score: value.score as f32,
        }
    }
 }
 #[repr(i32)]
 #[derive(Debug)]
 pub enum FoodKind {
    Food1 = 1,
    Food2 = 2,
    Food3 = 3,
    Garbage1 = 4,
    Garbage2 = 5,
    Garbage3 = 6,
 }
 #[repr(i32)]
 pub enum Direction {
    Up = 1,
    Down = 2,
    Left = 3,
    Right = 4,
    None = 5,
 }
--- a/pyr/src/lib.rs
+++ b/pyr/src/lib.rs
@ -0,0 +1,53 @@
 mod app;
 mod data;
 use std::slice;
 use app::{App, TickState};
 use data::parser::*;
 use data::prelude::*;
 use simple_logger::SimpleLogger;
 #[no_mangle]
 pub unsafe extern "C" fn tick(
    app: *mut App,
    overall: &RawOverall,
    food: *mut RawFood,
    len: u64,
 ) -> i32 {
    let app = &mut *app;
    let state = {
        let foods: Vec<Food> = slice::from_raw_parts(food, len as usize)
            .into_iter()
            .map(|x| x.to_owned().into())
            .collect();
        TickState {
            frame: overall.frame,
            player: overall.get_player(),
            opponent: overall.get_opponent(),
            foods,
        }
    };
    app.run(state) as i32
 }
 #[no_mangle]
 pub unsafe extern "C" fn check_point(app: *mut App) {
    let app = &mut *app;
    app.check_point();
 }
 #[no_mangle]
 pub unsafe extern "C" fn new_app() -> *const App {
    SimpleLogger::new().init().unwrap();
    log::info!("Initializing App...");
    let a = Box::into_raw(Box::new(App::new()));
    a
 }
 #[no_mangle]
 pub unsafe extern "C" fn drop_app(app: *mut App) {
    // drop(Box::from_raw(app))
 }
--- a/pyr/src/main.rs
+++ b/pyr/src/main.rs
@ -0,0 +1,25 @@
 // use candle_core::{DType, Device};
 // use candle_nn::{linear, loss::mse, seq, Activation, AdamW, VarBuilder, VarMap};
 fn main() {
    // let mut var_map = VarMap::new();
    // var_map.load("model.bin").unwrap();
    // let vb = VarBuilder::from_varmap(&var_map, DType::F32, &Device::Cpu);
    // let model = seq()
    //     .add(linear(14, 60, vb.pp("linear_in")).unwrap())
    //     .add(Activation::LeakyRelu(0.01))
    //     .add(linear(60, 48, vb.pp("linear_mid_1")).unwrap())
    //     .add(Activation::LeakyRelu(0.01))
    //     .add(linear(48, 48, vb.pp("linear_mid_2")).unwrap())
    //     .add(Activation::LeakyRelu(0.01))
    //     .add(linear(48, 5, vb.pp("linear_out")).unwrap())
    //     .add(Activation::LeakyRelu(0.01));
    // let optimizer = AdamW::new_lr(var_map.all_vars(), 0.5).unwrap();
    // let target = Tensor::new(&[0.0], &Device::Cpu).unwrap();
    // self.optimizer
    //         .backward_step(&Tensor::new(&[loss], &DEVICE).unwrap())
    //         .unwrap();
 }
--- a/pyr/test.py
+++ b/pyr/test.py
@ -0,0 +1,20 @@
 #!/usr/bin/env python3
 from ctypes import *
 mylib = cdll.LoadLibrary("./target/release/libpyr.so")  
 class Point(Structure):
    _fields_ = [("x", c_uint64), ("y", c_uint64)]
 point=Point()
 point.x=1
 point.y=2
 ptr=pointer(point)
 print(ptr)
 mylib.set_point(ptr)
 print(point.x)
 print(point.y)