
- Optimize 'find next moves' by just passing them to next iteration and
  removing the move that was used.
  
- Optimize field data into single dimension array

- Patterns that were chosen by decision trees should be looked from each position of the board
- NOTE: also mirrored

- Gain calculation: subsetEntropy - can be optimized into one filtering pass instead of one filtering pass for each different value
  

http://pressibus.org/ataxx/autre/minimax/node2.html

minimax(in game board, in int depth, in int max_depth,
        out score chosen_score, out score chosen_move)
begin 
    if (depth = max_depth) then
        chosen_score = evaluation(board);
    else 
        moves_list = generate_moves(board);
        if (moves_list = NULL) then
            chosen_score = evaluation(board);
        else 
            for (i = 1 to moves_list.length) do
                best_score = infinity;
                new_board = board;
                apply_move(new_board, moves_list[i]);
                minimax(new_board, depth+1, max_depth, the_score, the_move);
                if (better(the_score, best_score)) then
                    best_score = the_score;
                    best_move = the_move;
                endif
            enddo
            chosen_score = best_score;
            chosen_move = best_move;
        endif
    endif
end.

 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 

http://www.cis.temple.edu/~giorgio/cis587/readings/id3-c45.html 
    function ID3 (R: a set of non-categorical attributes,
		 C: the categorical attribute,
		 S: a training set) returns a decision tree;
   begin
	If S is empty, return a single node with value Failure;
	If S consists of records all with the same value for 
	   the categorical attribute, 
	   return a single node with that value;
	If R is empty, then return a single node with as value
	   the most frequent of the values of the categorical attribute
	   that are found in records of S; [note that then there
	   will be errors, that is, records that will be improperly
	   classified];
	Let D be the attribute with largest Gain(D,S) 
	   among attributes in R;
	Let {dj| j=1,2, .., m} be the values of attribute D;
	Let {Sj| j=1,2, .., m} be the subsets of S consisting 
	   respectively of records with value dj for attribute D;
	Return a tree with root labeled D and arcs labeled 
	   d1, d2, .., dm going respectively to the trees 

	     ID3(R-{D}, C, S1), ID3(R-{D}, C, S2), .., ID3(R-{D}, C, Sm);
   end ID3;