[Toss-devel-svn] SF.net SVN: toss:[1407] trunk/Toss
Status: Beta
Brought to you by:
lukaszkaiser
From: <luk...@us...> - 2011-04-13 14:42:46
|
Revision: 1407 http://toss.svn.sourceforge.net/toss/?rev=1407&view=rev Author: lukaszkaiser Date: 2011-04-13 14:42:37 +0000 (Wed, 13 Apr 2011) Log Message: ----------- Removing Game ml. Modified Paths: -------------- trunk/Toss/Makefile trunk/Toss/Play/Makefile trunk/Toss/Play/Play.ml trunk/Toss/Play/PlayTest.ml trunk/Toss/Server/Server.ml trunk/Toss/Server/ServerTest.ml trunk/Toss/TossFullTest.ml trunk/Toss/TossTest.ml Removed Paths: ------------- trunk/Toss/Play/Game.ml trunk/Toss/Play/GameTest.ml Modified: trunk/Toss/Makefile =================================================================== --- trunk/Toss/Makefile 2011-04-05 20:44:16 UTC (rev 1406) +++ trunk/Toss/Makefile 2011-04-13 14:42:37 UTC (rev 1407) @@ -129,7 +129,7 @@ Play/HeuristicTest \ Play/MoveTest \ Play/GameTreeTest \ - Play/GameTest + Play/PlayTest # GGP tests GGP_tests: \ Deleted: trunk/Toss/Play/Game.ml =================================================================== --- trunk/Toss/Play/Game.ml 2011-04-05 20:44:16 UTC (rev 1406) +++ trunk/Toss/Play/Game.ml 2011-04-13 14:42:37 UTC (rev 1407) @@ -1,1245 +0,0 @@ -(* Game-related definitions. The UCTS algorithm. *) - -open Printf - -(* Default effort overshoots to let timeout handle stopping. *) -let default_effort = ref 10 - -let debug_level = ref 0 -let set_debug_level i = (debug_level := i) - -let deterministic_suggest = ref false - -(* A global "hurry up!" switch triggered by the timer alarm. *) -let timeout = ref false -let get_timeout () = !timeout -let cancel_timeout () = - let remaining = Unix.alarm 0 in - (* {{{ log entry *) - if !debug_level > 0 then ( - if !timeout then - Printf.printf "Computation finished by timeout.\n%!" - else - Printf.printf "Computation finished with %d seconds left.\n%!" - remaining - ); - (* }}} *) - timeout := false - -let trigger_timeout _ = - (* if !debug_level > 0 then printf " TIMEOUT %!"; *) - (* TODO: no output possible from inside handler *) - timeout := true - -let () = - Sys.set_signal Sys.sigalrm - (Sys.Signal_handle (fun _ -> timeout := true)) - -type f_table = float array - -(* Cumulative score of players for computing value estimate. *) -type score = { - score_table : f_table; (* sum of payoffs *) - variation_table : f_table; (* sum of squares of payoffs *) - (* sum of the squares of payoffs *) - score_obs : int (* number of observations *) -} - -let add_score {score_table=table1; variation_table=vartab1; score_obs=obs1} - {score_table=table2; variation_table=vartab2; score_obs=obs2} = - {score_table = Aux.array_map2 (fun sc1 sc2 -> - sc1+.sc2) table1 table2; - variation_table = Aux.array_map2 (fun sc1 sc2 -> - sc1+.sc2) vartab1 vartab2; - score_obs = abs obs1 + abs obs2} - -let score_payoff payoff = { - score_table = payoff; - variation_table = Array.map (fun sc-> sc*.sc) payoff; - score_obs = 1; -} - -let discount n payoffs = - Array.map (fun payoff -> - (0.5 +. 1./.((float_of_int n) +. 2.)) *. payoff) payoffs - - -type uctree_node = { - node_state : Arena.game_state ; - node_stats : score ; (* playout statistic *) - node_heuristic : f_table ; (* heuristic table *) - node_bestheur : int ; (* the subtree from which - [node_heuristic] is picked *) - node_endstate : Structure.structure ; (* final game state of a - playout that originated - the node (mostly for debugging) *) - node_subtrees : uctree array ; -} - -(* The game tree for turn-based multiplayer games UCT search. Assumes - determinism of move generation (the same state and location should - result in the same array of moves). *) -and uctree = - | Node of uctree_node - | Leaf of Arena.game_state * score * f_table * Structure.structure - (* once played leaf: state, time, location, score, heuristic, game-end *) - | Tip of Arena.game_state * f_table - (* unplayed leaf, with heuristic value (evaluation game - result) *) - | Terminal of Arena.game_state * score * f_table * f_table - (* the score, the cache of the actual payoff table and the - heuristic *) - | TEmpty (* to be expanded in any context *) - -(* History stored for a play, including caching of computations for - further use. *) -type memory = - | No_memory - | State_history of Structure.structure list - (* states visited in reverse order *) - | UCTree of uctree - -(* Effect that heuristics have on the MCTS algorithm. *) -type mcts_heur_effect = - | Heuristic_local of float (* TODO: not implemented *) - (* each tree node only considers the heuristic of its state, - the parameter is the influence of the heuristic on the tree - traversal, there is no influence on the actual choice *) - | Heuristic_mixed of float * float - (* a node stores a heuristic maximaxed from the leaf states of - the subtree, [MaximaxMixed (trav, select)] has [trav] - the influence on the tree traversal, [select] the influence - on the actual choice *) - | Heuristic_select of float - (* a node stores a heuristic maximaxed from the leaf states of - the subtree, the parameter is the influence on the tree - traversal, the actual choice is based on the heuristic alone - and not the Monte-Carlo payoff estimates *) - | Heuristic_only - (* a node stores a heuristic maximaxed from the leaf states of - the subtree, which completely replaces the role of the - Monte-Carlo payoff estimates from the standard UCT algorithm *) - - -(* Parameters of the Upper Confidence Bounds-based Monte Carlo Tree - Search. Cooperative (competitive) mode means that of actions with - equal value for a given player, the one with highest (lowest) sum - of values is chosen. *) -type uct_params = { - cUCB : float ; (* coefficient of the confidence bound component *) - constK : float ; (* smoothening *) - iters : int ; (* tree updates per move *) - horizon : int option ; (* limit on the playout length *) - heur_effect : mcts_heur_effect ; (* maximaxed vs local heuristic *) - cooperative : bool ; (* cooperative vs competitive *) - cLCB : float option ; (* cautious action picking; if present, use - lower confidence bound with given - coefficient for action selection *) -} - -(* An evaluation game is a set of games specific to locations, each - game is used to assess the value of its location. It contains the - same data as {!play} plus {!play_state} (for initial state) below, - only without the [struc] and [time] fields, and with some general - playout parameters. [ev_agents] array can be empty but only if - every location of the [ev_game] subgame has empty moves list. *) -type evgame_loc = { - ev_game : Arena.game; - ev_agents : agent array; (* player-indexed or empty *) - ev_delta : float; - ev_location : int; - ev_memory : memory array; (* player-indexed *) - ev_horizon : int option; -} -and evaluation_game = evgame_loc array - - -(* How does a player pick moves. *) -and agent = - | Random_move - (* select a random move; avoids rewriting all matches and calling - evaluation games *) - | Maximax_evgame of evaluation_game * bool * int * bool - (* select a move according to evaluation games played in each - leaf state; in a cooperative/competitive way (see - {!uct_params}); expand the full game subtree to the given - depth and propagate evaluation game results from leaves by - taking cooperative/competitive best move for location's - player; optional alpha-beta-like pruning, with move - reordering based on afterstate heuristic value *) - | Tree_search of evaluation_game * int option * uct_params * agent array - (* Monte-Carlo tree search; uses the evaluation game to compute - heuristic values for use within the tree, and the agents for - playout plays *) - | External of (string array -> int) - (* take an array of string representations of resulting structures - and return the position of the desired state; for interacting - with external players only *) - -(* The evolving state of a play. *) -type play_state = { - game_state : Arena.game_state ; - memory : memory array ; (* player-specific history *) -} - -(* Data defining a play (without the initial play state). TODO: - remove dependency on [delta] (move it to Arena.game). *) -type play = { - game : Arena.game ; (* the game played *) - agents : agent array ; (* player-indexed *) - delta : float ; (* expected width of payoffs *) -} - - -let default_params = { - cUCB = 1.0 ; - cLCB = Some 1.0 ; - constK = 1.0 ; - iters = 200 ; - horizon = None ; - heur_effect = Heuristic_mixed (0.5, 0.2) ; - cooperative = false ; -} - - -let default_heuristic = Heuristic.default_heuristic - -(* The UCB1-TUNED estimate, modified to extend to the zero- and - one-observation cases. *) -let ucb1_tuned ?(lower_bound=false) - params delta player parent_sc ~heuristic score = - if parent_sc.score_obs = 0 then failwith - "ucb1_tuned: parent has no observations"; - let cHEUR = match params.heur_effect with - | Heuristic_local c when not lower_bound -> c - | Heuristic_mixed (c, _) when not lower_bound -> c - | Heuristic_mixed (_, c) when lower_bound -> c - | Heuristic_select c -> c - | Heuristic_only -> 1.0 - | _ -> 0.0 in - let i2f = float_of_int in - let tot = i2f parent_sc.score_obs in - let vari = score.variation_table.(player) in - let obs = i2f score.score_obs in - let score = score.score_table.(player) in - let var_est = if obs < 2. then 0. else - (vari -. score *. score /. obs) /. (obs -. 1.) in - let var_ucb = if obs < 2. then 0. else - var_est +. delta *. - sqrt (2. *. log (tot +. params.constK) /. (obs +. params.constK)) in - let var_coef = if obs < 2. then 0.25 else - min 0.25 (var_ucb /. delta) in - let cb = - (if lower_bound then - match params.cLCB with Some lcb -> ~-. lcb | None -> 0. - else params.cUCB) *. delta *. sqrt - ((log (tot +. params.constK) /. (obs +. params.constK)) *. var_coef) in - let mean = if obs < 1. then 0. else score /. obs in - if params.heur_effect = Heuristic_only then - heuristic +. cb - else - mean +. cb +. cHEUR *. heuristic *. cb - -(* The move valuation used when actually making a move using the MCTS-UCT - algorithm, computed for all players. *) -let node_values params delta parent_sc heuristics - ({score_table=table;score_obs=obs} as score) = - let i2f = float_of_int in - if params.cLCB <> None && parent_sc.score_obs > 0 then - Array.mapi (fun player heuristic -> - ucb1_tuned ~lower_bound:true - params delta player parent_sc ~heuristic score) - heuristics - else - match params.heur_effect with - | Heuristic_mixed (_, cHEUR) -> - if obs = 0 then - Array.map (fun h -> cHEUR*.h) heuristics - else - Array.mapi (fun player score -> - score /. i2f (abs obs) +. - cHEUR *. heuristics.(player)) table - | Heuristic_local _ -> - if obs = 0 then Array.map (fun _ -> 0.) table - else - Array.map (fun score -> score /. i2f (abs obs)) table - | Heuristic_select _ | Heuristic_only -> heuristics - -let uctree_heuristic = function - | Node node -> node.node_heuristic - | Leaf (_,_,h,_) -> h - | Tip (_,h) -> h - | Terminal (_,_,h,_) -> h - | TEmpty -> failwith "uctree_heuristic: empty tree" - -let uctree_size = function - | Node node -> node.node_stats.score_obs - | Leaf (_,_,h,_) -> 1 - | Tip (_,h) -> 1 - | Terminal (_,_,h,_) -> 1 - | TEmpty -> 0 - -let uctree_location = function - | Node node -> node.node_state.Arena.cur_loc - | Leaf (s,_,_,_) -> s.Arena.cur_loc - | Tip (s,_) -> s.Arena.cur_loc - | Terminal (s,_,_,_) -> s.Arena.cur_loc - | _ -> failwith "uctree_location: empty tree" - -let uctree_model = function - | Node node -> node.node_state.Arena.struc - | Leaf (m,_,_,_) -> m.Arena.struc - | Tip (m,_) -> m.Arena.struc - | Terminal (m,_,_,_) -> m.Arena.struc - | _ -> failwith "uctree_model: empty tree" - -let uctree_state = function - | Node node -> node.node_state - | Leaf (m,_,_,_) -> m - | Tip (m,_) -> m - | Terminal (m,_,_,_) -> m - | _ -> failwith "uctree_state: empty tree" - -(* An unevaluated tree or subtree. *) -let uctree_empty = function - | TEmpty | Tip _ -> true - | _ -> false - -let uctree_score ?num_players = function - | Node node -> node.node_stats - | Leaf (_,s,_,_) -> s - | Terminal (_,s,_,_) -> s - | _ -> - match num_players with - | None -> failwith "uctree_score: no score in tree, no num_players" - | Some n -> - {score_table= Array.make n 0.0; - variation_table=Array.make n 0.0; score_obs=0} - -(* The result of the game played when the node was first grown. *) -let uctree_endgame = function - | Node node -> node.node_endstate - | Leaf (_,_,_,r) -> r - | Tip _ -> failwith "uctree_endgame: Tip" - | Terminal (r,_,_,_) -> r.Arena.struc - | TEmpty -> failwith "uctree_endgame: TEmpty" - - -let print_score (!) params delta heuristics score = - !"Values: "; - let values = node_values params delta score heuristics score in - Array.iteri (fun player score_v -> !(string_of_int player); - !" nobs="; !(string_of_int score.score_obs); - !" score="; - !(string_of_float score_v); - !" value="; - !(sprintf "%2.2f" values.(player)); - !" UCB="; - !(sprintf "%2.2f" - (ucb1_tuned params delta player score - ~heuristic:(heuristics.(player)) score)); - !"; ") score.score_table - -(* Print the whole tree. Debugging. *) -let print_uctree (!) params delta tree = - let kind = function - | TEmpty -> "Empty" - | Tip _ -> "Tip" | Leaf _ -> "Leaf" | Terminal _ -> "Terminal" - | Node _ -> "Node" in - let rec pr prefix pos = function - | Tip _ when debug_level.contents <= 4 -> () - | node -> - let pref_str s = - Str.global_replace (Str.regexp "\n") ("\n"^prefix) s in - !prefix; - let score = uctree_score ~num_players:0 node in - let heuristic = uctree_heuristic node in - !(sprintf "pos %d " pos); - print_score (!) params delta heuristic score; - !"heuristics"; - Array.iteri (fun player score-> - !(sprintf " %d:%f" player score)) heuristic; - !"; scores"; - Array.iteri (fun player score-> - !(sprintf " %d:%.2f" player score)) score.score_table; - !"; "; - !(kind node); !":\n"; - !prefix; - !(pref_str (Structure.str (uctree_model node))); - match node with - | TEmpty - | Terminal _ | Tip _ -> !"/\n" - | Leaf (_,_,_,result) -> - !"game ended in:\n"; - !prefix; !(pref_str (Structure.str result)); - !"/\n" - | Node node -> - !"game ended in:\n"; - !prefix; !(pref_str (Structure.str node.node_endstate)); - !"+\n"; - Array.iteri (fun pos subt -> - (* !(prefix^"| "); !(print_action act); !"\n"; *) - pr (prefix^"|") pos subt) node.node_subtrees in - if tree = TEmpty then !"Empty\n" - else pr " " 0 tree - -let str_payoff payoff = - String.concat ", " - (Array.to_list (Array.mapi (fun p v -> sprintf"%d:%f" p v) payoff)) - - -let initial_state ?(loc=0) {game=game; agents=agents} model = - (* {{{ log entry *) - if !debug_level > 5 then ( - Printf.printf "initial_state: agents #=%d, loc #=%d\n%!" - (Array.length agents) (Array.length game.Arena.graph); - ); - (* }}} *) - let player_memory = Array.map - (function Tree_search _ -> UCTree TEmpty | _ -> No_memory) agents in - { - game_state = {Arena.cur_loc = loc; time = 0.0; struc = model}; - memory = player_memory; - } - -(* TODO: [num_players] not used (remove if not needed). *) -let update_memory_single num_players state pos = function - | No_memory -> No_memory - | State_history history -> State_history (state.Arena.struc::history) - | UCTree (Node node) -> - UCTree node.node_subtrees.(pos) - | UCTree _ -> UCTree TEmpty - -let update_memory ~num_players state pos memory = - Array.map - (update_memory_single num_players state pos) memory - - -(* Average tables of numbers. *) -let average_table tables = - match tables with - | [] -> failwith "average_table: empty list" - | [table] -> table - | hd::tl -> - let n = float_of_int (List.length tables) in - let sum = List.fold_left (fun sum table -> - Aux.array_map2 (fun v1 v2 -> - v1+.v2) sum table) hd tl in - Array.map (fun v -> v /. n) sum - -(* Find all maximal elements. *) -let find_all_max cmp l = - let rec find best acc = function - | hd::tl -> - let rel = cmp hd best in - if rel < 0 then find best acc tl - else if rel = 0 then find best (hd::acc) tl - else find hd [hd] tl - | [] -> List.rev acc in - match l with - | [] -> invalid_arg "find_all_max: empty list" - | hd::tl -> find hd [hd] tl - -(* Maximaxing: find the best among subtrees for a player. Pick a best - entry in the lexicographic product of: maximal [scores] value for - [player], minimal/maximal sum of [scores] values (resp. competitive - [cooperative=false] / [cooperative=true] mode), integer accuracy - measure [subt_sizes]. Return a best position (randomized if - multiple are optimal) and a best scores table (averaged if multiple - are optimal). *) -let find_best_score ?(use_det_setting=false) cooperative player - scores subt_sizes = - (* find a new best score *) - let my_scores = Array.map (fun s->s.(player)) scores in - let bestsc = Aux.array_argfind_all_max compare my_scores in - match bestsc with - | [] -> - failwith "find_best_score: empty arg max" - | [bestsc] -> scores.(bestsc), bestsc - | _ -> - (* pick cooperative/competitive ones *) - let sc_sums = List.map - (fun bh -> bh, - Array.fold_left (+.) 0. scores.(bh)) - bestsc in - let cmp_sums : (int * float) -> (int * float) -> int = - if cooperative - then fun (_,x) (_,y) -> compare x y - else fun (_,x) (_,y) -> compare y x in - let bestsc = - find_all_max cmp_sums sc_sums in - match bestsc with - | [] -> failwith "impossible" - | [bestsc,_] -> scores.(bestsc), bestsc - | (bsc,_)::(bsc2,_)::_ - when use_det_setting && !deterministic_suggest -> - scores.(bsc), bsc - | _ -> - (* pick ones from biggest subtrees *) - let bestsc = - find_all_max - (fun (b1,_) (b2,_) -> subt_sizes.(b1) - subt_sizes.(b2)) - bestsc in - match bestsc with - | [] -> failwith "impossible" - | [bestsc,_] -> scores.(bestsc), bestsc - | _::_ -> - (* check the number of players - TODO: perhaps not worth averaging *) - let randbest, _ = - List.nth bestsc (Random.int (List.length bestsc)) in - let bestsc_table = scores.(randbest) in - if Array.length bestsc_table > 2 then - average_table - (List.map (fun (b,_) -> scores.(b)) - bestsc), - randbest - else bestsc_table, randbest - -let debug_count = ref 0 - -(* Generate evaluation game score (the whole payoff table). *) -let rec play_evgame grid_size model time evgame = - let subloc = evgame.ev_game.Arena.graph.(evgame.ev_location) in - if subloc.Arena.moves = [] then (* optimization *) - Array.map (fun expr -> - Solver.M.get_real_val expr model) subloc.Arena.payoffs - else - let state = - {game_state={Arena.cur_loc=evgame.ev_location; struc=model; time=time}; - memory=evgame.ev_memory} in - let subplay = - {game=evgame.ev_game; agents=evgame.ev_agents; delta=evgame.ev_delta} in - (* ignoring the endgame model *) - let _, payoff = - play ~grid_size ?horizon:evgame.ev_horizon subplay state in - payoff - -(* Generate evgame scores for possible moves. *) -and gen_scores grid_size subgames moves models loc = - Array.mapi (fun pos mv -> - let {Arena.struc=model; time=time} = models.(pos) in - play_evgame grid_size model time subgames.(mv.Move.next_loc) - ) moves - - - -(* Make a move in a play, or compute the payoff table when the game - ended. Return the move chosen and the moves considered. One can use - only a {!move} to suggest a move, or only the updated {!play_state} - to follow the best move (or both). Also return the accrued - computation as updated "memory" for the current state. - - Uses [Random_move] for other agents if their "effort" is set to - zero. Do not use [Random_move] or [effort=0] when the table of - moves is used for more than extracting the move selected! *) -and toss ~grid_size ?(just_payoffs=false) - ({game={Arena.rules=rules; graph=graph; num_players=num_players; - defined_rels=defined_rels}; - agents=agents; delta=delta} as play_def) - {game_state=state; memory=memory} = - let loc = graph.(state.Arena.cur_loc) in - let moves = - if just_payoffs then [| |] - else Move.gen_moves grid_size rules state.Arena.struc loc in - (* Don't forget to check after generating models as well -- - postconditions! *) - if moves = [| |] then - let payoff = - Array.map (fun expr -> - Solver.M.get_real_val expr state.Arena.struc) - loc.Arena.payoffs in - Aux.Right payoff - else - let agent = agents.(loc.Arena.player) in - match agent with - - | Random_move - | Maximax_evgame (_, _, 0, _) - | Tree_search (_, _, {iters=0}, _) -> - let mlen = Array.length moves in - let init_pos = Random.int mlen in - let pos = ref init_pos in - let nstate = ref None in - while !nstate = None && (!pos <> init_pos || !pos < mlen) do - let mv = moves.(!pos mod mlen) in - let rule = List.assoc mv.Move.rule rules in - nstate := - Aux.map_option - (fun (model, time, _) -> - (* ignoring shifts, i.e. animation steps *) - {Arena.cur_loc=mv.Move.next_loc; struc=model; time=time}) - (ContinuousRule.rewrite_single state.Arena.struc state.Arena.time - mv.Move.embedding rule mv.Move.mv_time mv.Move.parameters); - incr pos - done; - (match !nstate with - | None -> - let payoff = - Array.map (fun expr -> - Solver.M.get_real_val expr state.Arena.struc) - loc.Arena.payoffs in - Aux.Right payoff - | Some state -> - (* [pos] refers to unfiltered array! use only to extract - | the move from the returned array *) - Aux.Left - (!pos mod mlen, moves, memory, - {game_state = state; - memory = update_memory ~num_players state !pos memory})) - - | Maximax_evgame (subgames, cooperative, depth, use_pruning) -> - (* {{{ log entry *) - - let nodes_count = ref 0 in - let size_count = ref 1 in - let depth0 = depth in - let debug_playclock = ref 0. in - if !debug_level > 1 && depth > 1 || !debug_level > 3 - then ( - printf "toss: %s ev game, timer started...\n%!" - (if use_pruning then "alpha_beta_ord" else "maximax"); - debug_playclock := Sys.time ()); - - (* }}} *) - (* full tree search of limited depth by plain recursive - calls, with optional alpha-beta pruning *) - (* [betas] are used imperatively *) - let rec maximax_tree pre_heur prev_player betas depth - {Arena.cur_loc = loc; struc=model; time=time} = - (* {{{ log entry *) - incr nodes_count; - size_count := !size_count + Array.length moves; - if (depth0 > 2 || !debug_level > 4) - && depth > 1 && !debug_level > 0 - then printf "%d,%!" !nodes_count; - if !debug_level > 2 && (depth0 > 2 || !debug_level > 4) - && (depth > 1 || !debug_level > 3) - then printf "%s%!" - (Str.global_replace (Str.regexp "\n") - ("\n"^String.make (max 0 (depth0-depth)) '|') - ("\n" ^ Structure.str model)); - (* }}} *) - if !timeout then (* will be handled by i.deep. *) - Array.map (fun _ -> 0.) graph.(loc).Arena.payoffs - else if depth < 1 then ( (* leaf position *) - let res = - match pre_heur with - | Some h -> h - | None -> - play_evgame grid_size model time subgames.(loc) in - (* {{{ log entry *) - if !debug_level > 4 then ( - let player = graph.(loc).Arena.player in - printf ", leaf %d heur: %F %!" player res.(player) - ); - (* }}} *) - res - ) else - let location = graph.(loc) in - let moves = - Move.gen_moves grid_size rules model location in - if moves = [| |] then (* terminal position *) - let res = - (* *) - Array.map (fun expr -> - 100000. *. - Solver.M.get_real_val expr model) - location.Arena.payoffs (* see [let payoff] above *) - (* * - play_evgame grid_size model time subgames.(loc) - * *) - in - (* {{{ log entry *) - if !debug_level > 4 then ( - let player = graph.(loc).Arena.player in - printf ", terminal %d heur: %F %!" player res.(player) - ); - (* }}} *) - res - else if !timeout then - Array.map (fun _ -> 0.) graph.(loc).Arena.payoffs - else - let moves, models = Move.gen_models rules model time moves in - let n = Array.length models in - if !timeout then - Array.map (fun _ -> 0.) graph.(loc).Arena.payoffs - else if n = 0 then begin (* terminal after postconditions *) - let res = - (* play_evgame grid_size model time subgames.(loc) *) - (* *) - Array.map (fun expr -> - 100000. *. - Solver.M.get_real_val expr model) - location.Arena.payoffs - (* * - play_evgame grid_size model time subgames.(loc) - * *) - in - (* {{{ log entry *) - if !debug_level > 4 then ( - let player = graph.(loc).Arena.player in - printf ", terminal %d heur: %F %!" player res.(player) - ); - (* }}} *) - res - end else - let player = location.Arena.player in - let now_pruning = use_pruning && prev_player <> player in - let new_betas = Array.make num_players infinity in - let index = - Array.init (Array.length models) (fun i->i) in - let heuristics = - if depth > 1 then begin - let heuristics = - gen_scores grid_size subgames moves models location in - Array.sort (fun j i-> compare - heuristics.(i).(player) heuristics.(j).(player)) index; - (* {{{ log entry *) - if !debug_level > 2 && (depth0 > 2 || !debug_level > 4) && - (depth > 1 || !debug_level > 3) - then - printf ", best %d pre-heur: %F %!" player - heuristics.(index.(0)).(player); - (* }}} *) - Some heuristics - end else None in - let rec aux best i = - if i < n && not !timeout then ( - let pos = index.(i) in - let state = models.(pos) in - let sub_heur = - maximax_tree - (Aux.map_option (fun h->h.(pos)) heuristics) - player new_betas (depth-1) state in - (* note strong inequality: don't lose ordering info *) - if now_pruning && sub_heur.(player) > betas.(player) - then ( - (* {{{ log entry *) - if !debug_level > 2 && (depth0 > 2 || !debug_level > 4) && - (depth > 1 || !debug_level > 3) - then ( - printf ", best cut %d maximax: %F. %!" player - sub_heur.(player)); - (* }}} *) - sub_heur) - else if sub_heur.(player) > best.(player) - then aux sub_heur (i+1) - else aux best (i+1)) - else if !timeout then best - else ( - betas.(player) <- best.(player); - (* {{{ log entry *) - if !debug_level > 2 && (depth0 > 2 || !debug_level > 4) && - (depth > 1 || !debug_level > 3) - then ( - printf ", best %d maximax: %F. %!" player - best.(player)); - (* }}} *) - best) in - let alphas = Array.make num_players neg_infinity in - aux alphas 0 in - let betas = Array.make num_players infinity in - let player = loc.Arena.player in - let moves, models = - Move.gen_models rules state.Arena.struc state.Arena.time moves in - if models = [| |] then - let payoff = - Array.map (fun expr -> - Solver.M.get_real_val expr state.Arena.struc) - loc.Arena.payoffs in - Aux.Right payoff - else - let cur_depth = ref 0 in - (* {{{ log entry *) - if !debug_level > 1 && (depth > 2 || !debug_level > 3) then ( - Printf.printf "\n\nIterative-deepening: depth %d\n%!" - (!cur_depth + 1) - ); - (* }}} *) - let scores = - Array.map (maximax_tree None player betas !cur_depth) models in - incr cur_depth; - while not !timeout && !cur_depth < depth do - (* {{{ log entry *) - if !debug_level > 1 && (depth > 2 || !debug_level > 3) then ( - Printf.printf "\n\nIterative-deepening: depth %d\n%!" - (!cur_depth + 1) - ); - (* }}} *) - let index = - Array.init (Array.length models) (fun i->i) in - Array.sort (fun j i-> compare - scores.(i).(player) scores.(j).(player)) index; - let betas = Array.make num_players infinity in - let new_scores = - Array.map (fun j -> - maximax_tree None player betas !cur_depth models.(j)) - index in - incr cur_depth; - if not !timeout then - Array.iteri (fun i j -> - (* inverting the permutation *) - scores.(j) <- new_scores.(i)) index; - (* {{{ log entry *) - if !debug_level > 1 && (depth > 2 || !debug_level > 3) then ( - Printf.printf "\nIterative-deepening: depth %d scores:\n%!" - !cur_depth; - Array.iteri (fun i score -> - Printf.printf "Structure:%s -- score %F\n" - (Structure.str models.(i).Arena.struc) score.(player)) scores - ); - (* }}} *) - done; - let _, best = - find_best_score ~use_det_setting:true cooperative player scores - (Array.map (fun _ -> 1) scores) in - let state = models.(best) in - (* {{{ log entry *) - if !debug_level > 0 && (depth > 1 || !debug_level > 3) - then printf " %d nodes, %d size, %f elapsed time\n%!" - !nodes_count !size_count - (Sys.time () -. !debug_playclock); - if !debug_level > 1 && (depth > 1 || !debug_level > 3) - then - Printf.printf "moving to state\n%s\n%!" - (Structure.str state.Arena.struc); - (* }}} *) - Aux.Left - (best, moves, memory, - {game_state=state; - memory=update_memory ~num_players state best memory}) - - | Tree_search (subgames, evgame_horizon, params, agents) -> - (* {{{ log entry *) - let debug_playclock = ref 0. in - if !debug_level > 1 then ( - debug_playclock := Sys.time (); - printf "\ntoss: tree search, timer started\n%!"); - (* }}} *) - (* the generated moves are wasted, but it's not much *) - let uctree = - match memory.(loc.Arena.player) with - | UCTree uctree -> uctree - | No_memory -> TEmpty - | _ -> failwith - "toss: tree search agent without game tree memory" in - (* {{{ log entry *) - if !debug_level > 2 then ( - print_endline "\ntoss: initial tree:"; - print_uctree (print_string) - params delta uctree; flush stdout); - if !debug_level > 3 then printf "toss: %d iters\n" - params.iters; - (* }}} *) - (* [grow_uctree] will check if it is not a terminal - position *) - let uctree = ref uctree and iteri = ref 0 in - (* the score update is already stored in the tree *) - while !iteri < params.iters && not !timeout do - incr iteri; - (* {{{ log entry *) - if !debug_level > 0 then printf "%d,%!" !iteri; - (* }}} *) - uctree := - snd (grow_uctree grid_size {play_def with agents=agents} - params subgames evgame_horizon - ~default_state:{game_state=state; memory=memory} !uctree) - done; - (* {{{ log entry *) - if !debug_level > 2 then ( - print_endline "\ntoss: updated tree:"; - print_uctree (print_string) - params delta !uctree; flush stdout); - if !debug_level > 1 then - printf "elapsed time: %f\n%!" - (Sys.time () -. !debug_playclock); - (* }}} *) - (match !uctree with - | Node node -> - let scores = Array.map (fun subtree -> - node_values params delta - (uctree_score ~num_players !uctree) - (uctree_heuristic subtree) - (uctree_score ~num_players subtree)) - node.node_subtrees in - let _, best = - find_best_score ~use_det_setting:true - params.cooperative loc.Arena.player - scores (Array.map uctree_size node.node_subtrees) in - let state = uctree_state node.node_subtrees.(best) in - (* {{{ log entry *) - if !debug_level > 1 then - Printf.printf "moving to state\n%s\n%!" - (Structure.str state.Arena.struc); - (* }}} *) - memory.(loc.Arena.player) <- (UCTree (Node node)); - Aux.Left - (best, moves, memory, - {game_state=state; - memory= - update_memory num_players state best memory}) - - | Terminal (game_state, score, heuristic, payoff) -> - Aux.Right payoff - - | _ -> failwith "toss: tree search -- unexpected end of tree") - | External callback -> - (* {{{ log entry *) - if !debug_level > 3 then printf "toss: external\n"; - (* }}} *) - let moves, models = - Move.gen_models rules state.Arena.struc state.Arena.time moves in - if models = [| |] then - let payoff = - Array.map (fun expr -> - Solver.M.get_real_val expr state.Arena.struc) - loc.Arena.payoffs in - Aux.Right payoff - else - let descriptions = - Array.map (fun m -> Structure.str m.Arena.struc) models in - let best = callback descriptions in - let state = models.(best) in - Aux.Left - (best, moves, memory, - {game_state=state; - memory=update_memory num_players state best memory}) - - -(* Play a play, by applying {!toss}, till the end. Return the final - structure and its payoff. - - The [set_timer] should be only provided for standalone plays. For - suggestions, the timer is set by {!Server}. Tests use their own - timers too, see {!GameTest}. *) -and play ~grid_size ?set_timer ?horizon ?(plys=0) play_def state = - let () = match set_timer with - | None -> () - | Some timer -> - (* {{{ log entry *) - if !debug_level > 2 then printf "SET ALARM %d\n%!" timer; - (* }}} *) - ignore (Unix.alarm timer) in - let res = - toss ~grid_size - ~just_payoffs:(horizon <> None && plys >= Aux.unsome horizon) - play_def state in - let () = match set_timer with - | None -> () - | Some _ -> cancel_timeout () in - match res with - | Aux.Left (_,_,_,state) -> - (* {{{ log entry *) - if !debug_level > 5 || (!debug_level > 0 && set_timer <> None) then - printf "step-state:\n%s\n%!" - (Structure.str state.game_state.Arena.struc); - (* }}} *) - play ~grid_size ?set_timer ?horizon ~plys:(plys+1) play_def state - | Aux.Right payoff -> - (* {{{ log entry *) - if !debug_level > 5 || (!debug_level > 1 && set_timer <> None) then - printf "payoff-state:\n%a\n%!" - (Aux.array_fprint (fun f pv->fprintf f "%F" pv)) payoff; - (* }}} *) - state.game_state.Arena.struc, discount plys payoff - - -(* Walk up the tree selecting the optimal estimates route, and update - the estimates and heuristics ("maximax") on the way down. - - Currently, timeouts are not handled inside UCT iterations. *) -and grow_uctree grid_size - ({game={Arena.rules=rules; graph=graph; num_players=num_players}; - delta=delta} as play_def) params subgames - evgame_horizon ?default_state = - (* the state is only used for the empty tree case *) - function - | Node { - node_state=game_state; node_stats=score; - node_heuristic=heuristic; node_bestheur=old_bestheur; - node_endstate=endmodel; node_subtrees=subtrees - } -> - let player = graph.(game_state.Arena.cur_loc).Arena.player in - (* compute UCBs and update the best subtree *) - let ucb_scores = Array.map (fun subtree -> - let heuristic = uctree_heuristic subtree in - ucb1_tuned params delta player score - ~heuristic:(heuristic.(player)) - (uctree_score ~num_players subtree)) subtrees in - let best = Aux.array_argfind_all_max - (compare : float -> float -> int) ucb_scores in - (* no use of prioritizing cooperative/competitive in an - exploratory context *) - let best = List.nth best (Random.int (List.length best)) in - let upscore, subtree = - grow_uctree grid_size play_def params subgames evgame_horizon - subtrees.(best) in - subtrees.(best) <- subtree; - let score = add_score score upscore in - (* maximaxing -- update the heuristic if needed *) - let subtree_heur = uctree_heuristic subtree in - let heuristic, bestheur = - if subtree_heur.(player) > heuristic.(player) - then subtree_heur, best - else if best <> old_bestheur - then heuristic, old_bestheur - else if subtree_heur.(player) = heuristic.(player) - then subtree_heur, old_bestheur (* update for other players *) - else - let heuristics = Array.map uctree_heuristic subtrees in - let subt_sizes = Array.map (fun subt -> - (uctree_score ~num_players subt).score_obs) subtrees in - find_best_score params.cooperative - player heuristics subt_sizes in - upscore, - Node { - node_state=game_state; node_stats=score; - node_heuristic=heuristic; node_endstate=endmodel; - node_subtrees=subtrees; node_bestheur=bestheur; - } - - | Leaf (game_state, score, heuristic, endmodel) -> - let player = graph.(game_state.Arena.cur_loc).Arena.player in - expand_uctree grid_size play_def game_state ~score subgames - evgame_horizon params.heur_effect heuristic params.horizon - params.cooperative player - - | Tip (game_state, heuristic) -> - let player = graph.(game_state.Arena.cur_loc).Arena.player in - expand_uctree grid_size play_def game_state subgames evgame_horizon - params.heur_effect heuristic params.horizon params.cooperative - player - - | Terminal (game_state, score, heuristic, payoff) -> - let upscore = score_payoff payoff in - let score = add_score score upscore in - upscore, - Terminal (game_state, score, heuristic, payoff) - - | TEmpty -> - let play_state = Aux.unsome default_state in - let endmodel, payoff = - play ~grid_size ?horizon:params.horizon play_def play_state in - let upscore = score_payoff payoff in - upscore, - (* the heuristic value of the root can be ignored *) - Leaf (play_state.game_state, upscore, payoff, endmodel) - -(* Expand a leaf of the tree. *) -and expand_uctree grid_size ({game={Arena.rules=rules; graph=graph; - num_players=num_players; - defined_rels=defined_rels}; - delta=delta} as play_def) - state ?score subgames evgame_horizon heur_effect heuristic - horizon cooperative player = - let location = graph.(state.Arena.cur_loc) in - let moves = Move.gen_moves grid_size rules state.Arena.struc location in - if moves = [| |] then - let payoff = - Array.map (fun expr -> - Solver.M.get_real_val expr state.Arena.struc) - location.Arena.payoffs in - let upscore = score_payoff payoff in - upscore, Terminal (state, upscore, heuristic, payoff) - - else - let moves, models = - Move.gen_models rules state.Arena.struc state.Arena.time moves in - if models = [| |] then - let payoff = - Array.map (fun expr -> - Solver.M.get_real_val expr state.Arena.struc) - location.Arena.payoffs in - let upscore = score_payoff payoff in - upscore, Terminal (state, upscore, heuristic, payoff) - else - let heuristics = - gen_scores grid_size subgames moves models location in - let subt_sizes = Array.map (fun _ -> 0) heuristics in - let heuristic, bestheur = - find_best_score cooperative player heuristics subt_sizes in - let scores = - Array.map (fun payoffs -> payoffs.(location.Arena.player)) - heuristics in - let subtrees = - Array.mapi (fun i state -> Tip (state, heuristics.(i))) - models in - let best = Aux.array_argfind_all_max - (compare : float -> float -> int) scores in - let best = List.nth best (Random.int (List.length best)) in - let next_state = models.(best) in - let empty_mem = Array.make num_players No_memory in - let state = - {game_state=next_state; memory=empty_mem} in - if heur_effect = Heuristic_only then - let upscore = score_payoff (Array.make num_players 0.) in - (* we maintain score to: (1) count the number of node visits, - (2) keep info when the search tree hits terminal nodes *) - let score = match score with - | None -> upscore - | Some score -> add_score score upscore in - subtrees.(best) <- - Leaf (next_state, upscore, heuristics.(best), next_state.Arena.struc); - (upscore, - Node { - node_state=next_state; node_stats=score; - node_heuristic=heuristic; node_endstate=next_state.Arena.struc; - node_subtrees=subtrees; node_bestheur=bestheur; - }) - else - let endmodel, payoff = play ~grid_size ?horizon play_def state in - let upscore = score_payoff payoff in - let score = match score with - | None -> upscore - | Some score -> add_score score upscore in - subtrees.(best) <- - Leaf (next_state, upscore, heuristics.(best), endmodel); - (upscore, - Node { - node_state=next_state; node_stats=score; - node_heuristic=heuristic; node_endstate=endmodel; - node_subtrees=subtrees; node_bestheur=bestheur; - }) - -let evgame_of_heuristic heuristics game = - let evgame gloc = - {ev_game = - {Arena.rules = []; - player_names = game.Arena.player_names; - defined_rels = game.Arena.defined_rels; - data = game.Arena.data; - graph = [| - {Arena.id=0; player=gloc.Arena.player; - payoffs=heuristics.(gloc.Arena.id); - moves=[]} |]; - num_players = game.Arena.num_players}; - ev_agents = [| |]; ev_delta = 0.0; ev_location = 0; - ev_horizon = Some 0; ev_memory = [| |]} in - Array.map evgame game.Arena.graph - -(* An UCT-based agent that uses either random playouts (when - [random_playout] is set to true) or the same location-dependent - heuristic for maximax search as given for the inside-tree - (including unevaluated tips) calculation. *) -let default_treesearch struc ~iters ?heuristic - ?advr ?(random_playout=false) - ?(playout_mm_depth=0) ?(heur_effect=default_params.heur_effect) - ?horizon game = - (* heuristics are location-id indexed first, then player-indexed *) - let heuristics = match heuristic with Some h -> h - | None -> - default_heuristic ~struc ?advr game in - let heur_evgame = - evgame_of_heuristic heuristics game in - let playout_agents = - if not (random_playout || heur_effect = Heuristic_only) then - Array.map (fun _ -> - Maximax_evgame - (heur_evgame, false, playout_mm_depth, true)) - game.Arena.graph - else Array.map (fun _ -> Random_move) game.Arena.graph in - Tree_search - (heur_evgame, Some 0, - {default_params with iters=iters; horizon=horizon; - heur_effect=heur_effect}, - playout_agents) - -(* Plain limited depth maximax tree search. *) -let default_maximax struc ~depth ?heuristic - ?advr ?(pruning=true) game = - let heuristics = match heuristic with Some h -> h - | None -> - default_heuristic ~struc ?advr game in - let heur_evgame = - evgame_of_heuristic heuristics game in - Maximax_evgame (heur_evgame, false, depth, pruning) - -let initialize_default state ?loc ?effort - ~search_method ?horizon - ?advr ?(payoffs_already_tnf=false) ?heuristic () = - let effort = match effort with - | None -> !default_effort | Some e -> e in - let {Arena.rules=rules; graph=graph; num_players=num_players} = fst state in - let struc = (snd state).Arena.struc in - (* {{{ log entry *) - if !debug_level > 0 then printf "\ninitializing game and play\n%!"; - (* }}} *) - (* TODO: default_heuristic redoes payoff normalization. *) - let game = fst state in - let agent = - match search_method with - | "maximax" -> - default_maximax struc ~depth:effort ?heuristic - ?advr ~pruning:false game - | "alpha_beta_ord" -> - default_maximax struc ~depth:effort ?heuristic - ?advr ~pruning:true game - | "uct_random_playouts" -> - default_treesearch struc - ~iters:effort ?heuristic ?advr ?horizon - ~random_playout:true game - | "uct_greedy_playouts" -> - default_treesearch struc - ~iters:effort ?heuristic ?advr ?horizon - ~random_playout:false game - | "uct_maximax_playouts" -> - default_treesearch struc - ~iters:effort ?heuristic ?advr ?horizon - ~random_playout:false ~playout_mm_depth:1 game - | "uct_no_playouts" -> - default_treesearch struc - ~iters:effort ?heuristic ?advr ?horizon - ~heur_effect:Heuristic_only game - | s -> failwith ("Game.initialize: unknown search method "^s) - in - let play = - {game = game; agents=Array.make num_players agent; - delta = 2.0} in (* FIXME: give/calc delta *) - (* {{{ log entry *) - if !debug_level > 2 then printf "play initialized\n%!"; - (* }}} *) - let init_state = initial_state ?loc play struc in - play, init_state - -let suggest ?effort play play_state = - let play = match effort with - | None -> play - | Some effort -> - {play with agents=Array.map - (function - | Tree_search (subgames, sth, params, agents) -> - Tree_search ( - subgames, sth, {params with iters=effort}, - agents) - | Maximax_evgame ( - subgames, cooperative, depth, use_pruning) -> - Maximax_evgame - (subgames, cooperative, effort, use_pruning) - | (Random_move | External _) as agent -> agent - ) play.agents} in - (* {{{ log entry *) - if !debug_level > 2 then printf "\nsuggest:\n%!"; - (* }}} *) - (match - toss ~grid_size:Move.cGRID_SIZE play play_state - with - | Aux.Left (bpos, moves, memory, _) -> - (* [suggest] does not update the state, rule application - should do it *) - (* {{{ log entry *) - if !debug_level > 1 then - printf "suggest: pos %d out of %d -- %s\n%!" bpos - (Array.length moves) - (Move.move_gs_str (play.game, play_state.game_state) moves.(bpos)); - (* }}} *) - Some (moves.(bpos), {play_state with memory=memory}) - | Aux.Right payoffs -> - (* {{{ log entry *) - if !debug_level > 2 then ( - Printf.printf "Suggest: found payoffs = %a\n%!" - (Aux.array_fprint (fun ppf -> Printf.fprintf ppf "%F")) payoffs - ); - (* }}} *) - None) - Deleted: trunk/Toss/Play/GameTest.ml =================================================================== --- trunk/Toss/Play/GameTest.ml 2011-04-05 20:44:16 UTC (rev 1406) +++ trunk/Toss/Play/GameTest.ml 2011-04-13 14:42:37 UTC (rev 1407) @@ -1,1063 +0,0 @@ -open OUnit -open Aux - -let assert_one_of str str_list = - let elements = String.concat ", " str_list in - assert_bool ("expected one of "^elements^", but got "^str) - (List.mem str str_list) - -let assert_not_one_of str str_list = - let elements = String.concat ", " str_list in - assert_bool ("expected disjoint from "^elements^", yet got "^str) - (not (List.mem str str_list)) - -let struc_of_str s = - StructureParser.parse_structure Lexer.lex (Lexing.from_string s) - -let formula_of_str s = - FormulaParser.parse_formula Lexer.lex (Lexing.from_string s) - -let real_expr_of_str s = - FormulaParser.parse_real_expr Lexer.lex (Lexing.from_string s) - -let defstruc_of_str s = - match - ArenaParser.parse_game_defs Lexer.lex (Lexing.from_string s) - with Arena.StateStruc struc -> struc - | _ -> failwith "defstruc_of_str: not a structure" - -let state_of_str s = - ArenaParser.parse_game_state Lexer.lex - (Lexing.from_string s) - -let state_of_file s = - let f = open_in s in - let res = - ArenaParser.parse_game_state Lexer.lex - (Lexing.from_channel f) in - res - -module StrMap = Structure.StringMap -module IntMap = Structure.IntMap - -let move_str r s m = Move.move_str_short s m -let move_gs_str = Move.move_gs_str_short - -let update_game ?(defs=false) - (lazy (horizon, adv_ratio, (state_game, state))) new_struc_s new_loc = - let new_struc = - if defs then defstruc_of_str new_struc_s else struc_of_str new_struc_s in - horizon, adv_ratio, - (state_game, {state with Arena.struc = new_struc; cur_loc = new_loc}) - -let get_loc_game ?update_struc - (lazy (horizon, adv_ratio, (state_game, state))) new_loc = - horizon, adv_ratio, - match update_struc with - | None -> - (state_game, {state with Arena.cur_loc = new_loc}) - | Some upd -> - (state_game, {state with - Arena.struc = upd state.Arena.struc; - cur_loc = new_loc}) - - -let rec binary_to_assoc = function - | [k;v]::tl -> (k,v)::(binary_to_assoc tl) - | [] -> [] - | _ -> failwith "binary_to_assoc: arity mismatch" - -let rec fix_find f x = - try fix_find f (f x) - with Not_found -> x - -module RelMap = Structure.StringMap -module Tuples = Structure.Tuples - -let winQxyz = - "ex x, y, z ((((Q(x) and Q(y)) and Q(z)) and ((((R(x, y) and R(y, z)) or (C(x, y) and C(y, z))) or ex u, v ((((R(x, v) and C(v, y)) and R(y, u)) and C(u, z)))) or ex u, v ((((R(x, v) and C(y, v)) and R(y, u)) and C(z, u))))))" -let winPxyz = - "ex x, y, z ((((P(x) and P(y)) and P(z)) and ((((R(x, y) and R(y, z)) or (C(x, y) and C(y, z))) or ex u, v ((((R(x, v) and C(v, y)) and R(y, u)) and C(u, z)))) or ex u, v ((((R(x, v) and C(y, v)) and R(y, u)) and C(z, u))))))" -let winPvwxyz = - "ex v, w, x, y, z ((((((P(v) and P(w)) and P(x)) and P(y)) and P(z)) - and ((((((R(v, w) and R(w, x)) and R(x, y)) and R(y, z)) or (((C(v, - w) and C(w, x)) and C(x, y)) and C(y, z))) or ex r, s, t, u - ((((((((R(v, r) and C(r, w)) and R(w, s)) and C(s, x)) and R(x, t)) - and C(t, y)) and R(y, u)) and C(u, z)))) or ex r, s, t, u - ((((((((R(v, r) and C(w, r)) and R(w, s)) and C(x, s)) and R(x, t)) - and C(y, t)) and R(y, u)) and C(z, u))))))" -let winQvwxyz = - "ex v, w, x, y, z ((((((Q(v) and Q(w)) and Q(x)) and Q(y)) and Q(z)) - and ((((((R(v, w) and R(w, x)) and R(x, y)) and R(y, z)) or (((C(v, - w) and C(w, x)) and C(x, y)) and C(y, z))) or ex r, s, t, u - ((((((((R(v, r) and C(r, w)) and R(w, s)) and C(s, x)) and R(x, t)) - and C(t, y)) and R(y, u)) and C(u, z)))) or ex r, s, t, u - ((((((((R(v, r) and C(w, r)) and R(w, s)) and C(x, s)) and R(x, t)) - and C(y, t)) and R(y, u)) and C(z, u))))))" - -let checkers_1x1_to_3x2 s = - let r = String.make (8*8*6+8*2) ' ' in - for i = 1 to 8*2 do - r.[i*8*3+i-1] <- '\n' done; - for i = 1 to 8 do - for j = 1 to 8 do - if (i+j) mod 2 = 0 then ( - String.blit "..." 0 r - ((8-j)*8*3*2 + (8-j)*2 + (i-1)*3) 3; - String.blit "..." 0 r - ((8-j)*8*3*2 + (8-j)*2 + 8*3 + 1 + (i-1)*3) 3); - if s.[(8-j)*8 + (8-j) + i-1] <> '.' then - r.[(8-j)*8*3*2 + (8-j)*2 + 8*3 + 1 + (i-1)*3] <- - s.[(8-j)*8 + (8-j) + i-1]; - done - done; - r - -let tictactoe_1x1_to_3x2 s = - let r = String.make (3*3*6+3*2) ' ' in - for i = 1 to 3*2 do - r.[i*3*3+i-1] <- '\n' done; - for i = 1 to 3 do - for j = 1 to 3 do - r.[(3-j)*3*3*2 + (3-j)*2 + 3*3 + 1 + (i-1)*3] <- - s.[(3-j)*3 + (3-j) + i-1] - done - done; - r - -let breakthrough_game = - lazy (None, 2.0, state_of_file "./examples/Breakthrough.toss") - -let breakthrough_simpl_game = - lazy (None, 2.0, state_of_file "./GGP/tests/breakthrough-simpl.toss") - -let tictactoe_game = - lazy (None, 5.0, state_of_file "./examples/Tic-Tac-Toe.toss") - -let gomoku8x8_game = - lazy (None, 5.0, state_of_file "./examples/Gomoku.toss") - -let gomoku19x19_game = - lazy (None, 5.0, state_of_file "./examples/Gomoku19x19.toss") - -let connect4_game = - lazy (None, 2.0, state_of_file "./examples/Connect4.toss") - -let chess_game = - lazy (Some 400, 2.0, state_of_file "./examples/Chess.toss") - -let checkers_game = - lazy (Some 400, 2.0, state_of_file "./examples/Checkers.toss") - -let breakthrough_heur_adv adv_ratio = - let expanded_win1 = - "ex y1, y2, y3, y4, y5, y6, y7, y8 (C(y1, y2) and C(y2, y3) and C(y3, y4) and C(y4, y5) and C(y5, y6) and C(y6, y7) and C(y7, y8) and W(y8))" in - let expanded_win2 = - "ex y1, y2, y3, y4, y5, y6, y7, y8 (B(y1) and C(y1, y2) and C(y2, y3) and C(y3, y4) and C(y4, y5) and C(y5, y6) and C(y6, y7) and C(y7, y8))" in - let expanded_payoff1 = - (Heuristic.of_payoff adv_ratio (strings_of_list ["B"; "W"]) - (real_expr_of_str - (":("^expanded_win1^") - :("^expanded_win2^")"))) in - let expanded_payoff2 = - (Heuristic.of_payoff adv_ratio (strings_of_list ["B"; "W"]) - (real_expr_of_str - (":("^expanded_win2^") - :("^expanded_win1^")"))) in - let expanded_payoffs = - [|expanded_payoff1; expanded_payoff2|] in - [|expanded_payoffs; expanded_payoffs|] - -let breakthrough_heur = - breakthrough_heur_adv 1.5 - -let chess_piece_value_heur = - let white_val = - "Sum (x | wP(x): 1) + Sum (x | wN(x): 3.2) + - Sum (x | wB(x): 3.33) + Sum (x | wR(x): 5.1) + Sum (x | wQ(x): 8.8) + - Sum (x | wK(x): 100)" in - let black_val = - "Sum (x | bP(x): 1) + Sum (x | bN(x): 3.2) + - Sum (x | bB(x): 3.33) + Sum (x | bR(x): 5.1) + Sum (x | bQ(x): 8.8) + - Sum (x | bK(x): 100)" in - let white_heur = - real_expr_of_str ("("^white_val^") - ("^black_val^")") in - let black_heur = - real_expr_of_str ("("^black_val^") - ("^white_val^")") in - let heuristic = [|white_heur; black_heur|] in - Array.make 32 heuristic - -let check_loc_random = function - | Game.Tree_search (_,_,_,evgames) -> - if - Aux.array_for_all (function Game.Random_move -> true | _ -> false) - evgames - then true - else if - Aux.array_for_all (function Game.Random_move -> false | _ -> true) - evgames - then false - else failwith "check_loc_random: inconsistent" - | _ -> failwith "check_loc_random: not a Tree_search" - -let payoff_str pay = - String.concat ", " - (List.map (fun (p,v)->p^": "^string_of_float v) pay) - -let try_n_times n (state_game, state) compute_move pred comment = - let hist = ref 0 in - let failed = ref [] in - for i = 1 to n do - let move, _ = compute_move () in - let move_str = move_gs_str state move in - if pred move_str - then incr hist - else failed := move_str :: !failed - done; - assert_bool - (Printf.sprintf "%s: only %d out of %d\nFailed moves: %s." - comment !hist n (String.concat "; " !failed)) - (float_of_int !hist >= float_of_int n *. 0.7) - -let compute_try search_method randomize effort timer_sec - (horizon, advr, state) loc msg pred = - if search_method = "GameTree" - then - let heur = Heuristic.default_heuristic - ~struc:(snd state).Arena.struc - ~advr (fst state) in - Play.set_timeout (float(timer_sec)); - let (move, _) = Play.maximax_unfold_choose effort - (fst state) (snd state) heur in - Play.cancel_timeout (); - let move_str = move_gs_str (snd state) move in - assert_bool - (Printf.sprintf "%s: Failed move: %s." msg move_str) - (pred move_str) - - else - let p,ps = Game.initialize_default - state ~advr ?horizon ~loc ~effort ~search_method () in - let compute_move () = - ignore (Unix.alarm timer_sec); - let res = - Aux.unsome (Game.suggest p ps) in - Game.cancel_timeout (); - res in - if randomize then - try_n_times 5 state compute_move pred msg - else - let move, _ = compute_move () in - let move_str = move_gs_str (snd state) move in - assert_bool - (Printf.sprintf "%s: Failed move: %s." msg move_str) - (pred move_str) - - - -let misc_tests = "misc" >::: [ - - "play: breakthrough suggest in game" >:: - (fun () -> - let horizon, advr, state = - update_game breakthrough_game -"[ | | ] \" - ... ... ... ... - ... ... ... ... -... ... ... ... -...B ... B.. ... - ... ... ... ... - B..B B..B B..B B.. -... ... ... ... -...B B..B B..B B..W - ... ... ... ... -W W..W W..B W..W ... -... ... ... ... -W..W W.. W.. ... [truncated message content] |