Author: Anthony Cozzie
Date: 14:14:20 05/05/04
I experimented with attack tables on the recommendation of Vincent last fall. The following code is based on his move generator. I eventually decided that on 32 bit machines, attack tables were a clear win over bitboards, but at 64 bits it would be about a break-even deal, and since I already had a bitboard engine, the choice was easy ;) anthony --------------------------------- #include "zappa.h" //Since Zappa spends a lot of time updating the attack tables, //I try to keep track of exactly what its doing. //#define ATTACK_TABLE_PROFILING #ifdef ATTACK_TABLE_PROFILING #define ATTACK_TABLE_PROFILE(A) A zappa_u64 at_sweep_calls = 0; zappa_u64 at_fixed_calls = 0; zappa_u64 at_xray_calls = 0; zappa_u64 at_sweep_squares = 0; zappa_u64 at_fixed_squares = 0; zappa_u64 at_xray_squares = 0; #else #define ATTACK_TABLE_PROFILE(A) #endif #define SCAN_FSM_STATE_NOX 0 #define SCAN_FSM_STATE_MX 1 #define SCAN_FSM_STATE_PX 2 #define SCAN_FSM_STATE_QX 3 #define SCAN_FSM_STATE_QPX 4 //Add a piece to the attack tables static zforceinline void add_to_attack_table_wpawn(zpos * RESTRICT p, int id, int sq); static zforceinline void add_to_attack_table_bpawn(zpos * RESTRICT p, int id, int sq); static zforceinline void add_to_attack_table_white_sweep(zpos * RESTRICT p, int id, int ptype, int sq); static zforceinline void add_to_attack_table_black_sweep(zpos * RESTRICT p, int id, int ptype, int sq); static zforceinline void add_to_attack_table_fixed(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq); //Remove a piece from the attack tables static zforceinline void remove_from_attack_table_wpawn(zpos * RESTRICT p, int id, int sq); static zforceinline void remove_from_attack_table_bpawn(zpos * RESTRICT p, int id, int sq); static zforceinline void remove_from_attack_table_white_sweep(zpos * RESTRICT p, int id, int ptype, int sq); static zforceinline void remove_from_attack_table_black_sweep(zpos * RESTRICT p, int id, int ptype, int sq); static zforceinline void remove_from_attack_table_fixed(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq); //Update xray attacks - the really annoying part, in terms of complexity static zforceinline void add_to_attack_table_xray(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq, int xsq); static zforceinline void remove_from_attack_table_xray(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq, int xsq); const zappa_s8 rot90[64] = {0, 8,16,24,32,40,48,56, 1, 9,17,25,33,41,49,57, 2,10,18,26,34,42,50,58, 3,11,19,27,35,43,51,59, 4,12,20,28,36,44,52,60, 5,13,21,29,37,45,53,61, 6,14,22,30,38,46,54,62, 7,15,23,31,39,47,55,63}; //Square A in rot-space is at square h1a8rot[A] in flat-space const zappa_s8 roth1a8[64] = {H1, G2, F3, E4, D5, C6, B7, A8, H2, G3, F4, E5, D6, C7, B8, A1, H3, G4, F5, E6, D7, C8, B1, A2, H4, G5, F6, E7, D8, C1, B2, A3, H5, G6, F7, E8, D1, C2, B3, A4, H6, G7, F8, E1, D2, C3, B4, A5, H7, G8, F1, E2, D3, C4, B5, A6, H8, G1, F2, E3, D4, C5, B6, A7}; //Square A in flatspace is at square roth1a8[A] in rot-space const zappa_s8 inv_roth1a8[64] = {H2, G3, F4, E5, D6, C7, B8, A1, H3, G4, F5, E6, D7, C8, B1, A2, H4, G5, F6, E7, D8, C1, B2, A3, H5, G6, F7, E8, D1, C2, B3, A4, H6, G7, F8, E1, D2, C3, B4, A5, H7, G8, F1, E2, D3, C4, B5, A6, H8, G1, F2, E3, D4, C5, B6, A7, H1, G2, F3, E4, D5, C6, B7, A8}; //Square A in rot-space is at square a1h8rot[A] in flat-space const zappa_s8 rota1h8[64] = {A1, B2, C3, D4, E5, F6, G7, H8, B1, C2, D3, E4, F5, G6, H7, A8, C1, D2, E3, F4, G5, H6, A7, B8, D1, E2, F3, G4, H5, A6, B7, C8, E1, F2, G3, H4, A5, B6, C7, D8, F1, G2, H3, A4, B5, C6, D7, E8, G1, H2, A3, B4, C5, D6, E7, F8, H1, A2, B3, C4, D5, E6, F7, G8}; //Square A in flatspace is at square roth1a8[A] in rot-space const zappa_s8 inv_rota1h8[64] = {A1, A2, A3, A4, A5, A6, A7, A8, B8, B1, B2, B3, B4, B5, B6, B7, C7, C8, C1, C2, C3, C4, C5, C6, D6, D7, D8, D1, D2, D3, D4, D5, E5, E6, E7, E8, E1, E2, E3, E4, F4, F5, F6, F7, F8, F1, F2, F3, G3, G4, G5, G6, G7, G8, G1, G2, H2, H3, H4, H5, H6, H7, H8, H1}; //Precomputed move masks for rotated bitboards bitboard rr_precomp[64][64]; //rook ranks: 8*64*64 = 32kB memory bitboard rf_precomp[64][64]; //rook files: 8*64*64 = 32kB memory bitboard ba_precomp[64][64]; //bishop a1->h8 diagonals: 8*64*64 = 32kB memory bitboard bh_precomp[64][64]; //bishop h1->a8 diagonals: 8*64*64 = 32kB memory //Precomputed move masks for kings, pawns, and knights bitboard k_precomp[64]; //king moves: 8*64 = 512B memory bitboard n_precomp[64]; //knight moves: 8*64 = 512B memory bitboard p_rightmask; //rightmask contains all the squares not on the A file bitboard p_leftmask; //leftmask contains all the squares not on the H file bitboard p_allmask; bitboard second_rank; //Squares on the second rank bitboard seventh_rank; //Squares on the seventh rank //given an array of 'delta x squares' and 'delta y squares' make a bitboard //with ones on those squares and zeroes everywhere else //also do not allow 'board wraps' //invariant: r is of size 8 //called by: fill_king_precomputes(), fill_knight_precomputes() static zforceinline bitboard get_precomp_move_mask(const int d_x[], const int d_y[], int sq) { register bitboard v, bb_one; register int i, x, y; for(i = 0, v = 0, bb_one = 1; i < 8; i++) { x = (sq & 7) + d_x[i]; y = ((sq & 56) >> 3) + d_y[i]; if((x >= 0) & (x <= 7) & (y >= 0) & (y <= 7)) v |= bb_one << (sq + d_x[i] + 8 * d_y[i]); } return v; } //initialize lookup tables void init_move_generation(void) { //constants for knight/king attack bitboards const static int d_nx[8] = {-2,-1, 1, 2, 2, 1,-1,-2}, d_ny[8] = {-1,-2,-2,-1, 1, 2, 2, 1}; const static int d_kx[8] = {-1, 0, 1, 1, 1, 0,-1,-1}, d_ky[8] = {-1,-1,-1, 0, 1, 1, 1, 0}; int i, j, r, k, rs_index, extfill, hoff, aoff, pcomp, mcomp, rcomp, qcomp, ptype, uptype, fsm_state, fsm_xstate; int fsm_stop_state, fsm_type_state, xpawn, ray_xtype; int barrier, xpiece, nfsm_xastate, nfsm_xrstate; int nfsm_type_state, nfsm_stop_state, xminor, g; int mgen_iter, sgen_iter, sq, x, y; //iterators const int directions[3][9] = {{7, -7, 9, -9, 0}, //slider delta types (rooks can move n*1, etc) {1, -1, 8, -8, 0}, {1, -1, 8, -8, 7, -7, 9, -9, 0}}; const int dx[2][8] = {{2, 1, -1, -2, -2, -1, 1, 2}, {1, 1, 0, -1, -1, -1, 0, 1}}; const int dy[2][8] = {{1, 2, 2, 1, -1, -2, -2, -1}, {0, 1, 1, 1, 0, -1, -1, -1}}; //Initialize edge table for(i = 0; i < 64; i++) edge_table[i] = (((i & 7) == 0) << 3) + (((i & 7) == 7) << 2) + (((i >> 3) == 7) << 1) +(((i >> 3) == 0) << 0); //xray piece lists for(i = 0; i < 8; i++) { g = uncompress_gradient[i]; for(j = 0; j < 64; j++) { k = 0; for(sq = j; (edge_table[sq] & scan_edge_table[g+9]) == 0; sq += g) xray_slider_move_table[i][j][k++] = (ray_type_table[i] << 6) + sq + g; xray_slider_move_table[i][j][int_max(k-1, 0)] |= 0xC0; //EOR } } //sliders for(i = 0; i < 3; i++) //loop over piece types { for(j = 0; j < 64; j++) //loop over initial squares { for(k = 0; k < 29; k++) //initialize (-1 -> end of list) mgen_slider_move_table[i][j][k] = -1; sgen_iter = mgen_iter = 0; for(k = 0; directions[i][k] != 0; k++) //loop over ray { for(sq = j; 1;) { //edge test: top if((sq >> 3) == 7 && directions[i][k] > 1) break; //edge test: bottom if((sq >> 3) == 0 && directions[i][k] < -1) break; //edge test: right if((sq & 7) == 7 && (directions[i][k] == -7 || directions[i][k] == 9 || directions[i][k] == 1)) break; //edge test: left if((sq & 7) == 0 && (directions[i][k] == 7 || directions[i][k] == -9 || directions[i][k] == -1)) break; sq += directions[i][k]; //proceed along the ray mgen_slider_move_table[i][j][mgen_iter++] = sq; //add to our list of squares } //0: xray White Pawns, diagonal sliders //1: xray Black Pawns, diagonal sliders //2: xray horizontal sliders //3: End of ray. if(abs(directions[i][k]) % 2 == 1 && abs(directions[i][k]) > 1) ray_xtype = directions[i][k] > 0; else ray_xtype = 2; //for all the square so far: if we terminate early, go to the next ray for(; sgen_iter < mgen_iter; sgen_iter++) mgen_slider_move_table[i][j][sgen_iter+32] = (((mgen_iter - sgen_iter - 1 == 0) ? 3 : ray_xtype) << 4) | (mgen_iter - sgen_iter - 1); } } } //fixed pieces for(i = 0; i < 2; i++) { for(j = 0; j < 64; j++) { for(k = 0; k < 16; k++) mgen_fixed_move_table[i][j][k] = -1; for(mgen_iter = k = 0; k < 8; k++) { x = File(j) + dx[i][k]; y = Rank(j) + dy[i][k]; if(x >= 0 && x <= 7 && y >= 0 && y <= 7) mgen_fixed_move_table[i][j][mgen_iter++] = (y << 3) | x; } } } //precompute the castling move for easy comparison / concise code wk_castle_move = m_build(0, 0, WKING, E1, G1); wq_castle_move = m_build(0, 0, WKING, E1, C1); bk_castle_move = m_build(0, 0, BKING, E8, G8); bq_castle_move = m_build(0, 0, BKING, E8, C8); //non xray state machine transistions and output for(i = 0; i < 320; i++) { uptype = (ptype = i & 0xF) >> 1; ray_xtype = (i >> 4) & 0x3; fsm_state = i >> 6; //are we done with this ray? switch(ray_xtype) { case 0: barrier = (ptype == WPAWN) || (uptype == KNIGHT) || (uptype == KING) || (uptype == ROOK); break; case 1: barrier = (ptype == BPAWN) || (uptype == KNIGHT) || (uptype == KING) || (uptype == ROOK); break; case 2: barrier = (uptype == PAWN) || (uptype == KNIGHT) || (uptype == KING) || (uptype == BISHOP); break; case 3: barrier = 1; break; } barrier |= (fsm_state == SCAN_FSM_STATE_PX) || (fsm_state == SCAN_FSM_STATE_QPX); //skip decision if(barrier) at_scan_skip_decision[i] = 0x0F; //skip else at_scan_skip_decision[i] = 0x00; //continue //bc decision if(barrier) at_scan_bc_decision[i] = SCAN_FSM_STATE_NOX; else if(uptype == QUEEN) at_scan_bc_decision[i] = SCAN_FSM_STATE_QX; else if(uptype == PAWN && fsm_state < SCAN_FSM_STATE_QX) at_scan_bc_decision[i] = SCAN_FSM_STATE_PX; else if(uptype == PAWN && fsm_state >= SCAN_FSM_STATE_QX) at_scan_bc_decision[i] = SCAN_FSM_STATE_QPX; else if(ptype > 0 && fsm_state < SCAN_FSM_STATE_QX) at_scan_bc_decision[i] = SCAN_FSM_STATE_MX; else at_scan_bc_decision[i] = fsm_state; } //xray xsquare state machine transistion for(i = 0; i < 256; i++) { //uncompress uptype = (ptype = i & 0xF) >> 1; ray_xtype = (i >> 4) & 0x3; fsm_type_state = (i >> 6) & 0x1; //M/Q fsm_xstate = (i >> 7) & 0x1; xminor = (ray_xtype < 2 && uptype == BISHOP) || (ray_xtype >= 2 && uptype == ROOK); xpawn = (ray_xtype == 0 && ptype == BPAWN) || (ray_xtype == 1 && ptype == WPAWN); //First transistion: is the piece we are adding/subtracting a slider? if(xminor) nfsm_type_state = 3; //0 else if(fsm_type_state == 0 && uptype == QUEEN) nfsm_type_state = 2; //M-Q else if(fsm_type_state == 1 && uptype == QUEEN) nfsm_type_state = 3; //0 else if(fsm_type_state == 0 && xpawn) nfsm_type_state = 4; //M-P else if(fsm_type_state == 1 && xpawn) nfsm_type_state = 5; //Q-P else nfsm_type_state = fsm_type_state; //M/Q //stop state is always 0 nfsm_stop_state = 0; //xstate: insertion is simply "do we xray", without caring what is on the square nfsm_xastate = fsm_xstate; //Removal: we do count the piece on the xsquare nfsm_xrstate = !xpawn && !xminor && uptype != QUEEN; //build and store at_xray_add_xsquare[i] = (nfsm_type_state << 2) | (nfsm_stop_state << 1) | (nfsm_xastate); at_xray_rem_xsquare[i] = (nfsm_type_state << 2) | (nfsm_stop_state << 1) | (nfsm_xrstate); } //xray state machine transistions for(i = 0; i < 1536; i++) { //uncompress uptype = (ptype = i & 0xF) >> 1; ray_xtype = (i >> 4) & 0x3; fsm_xstate = (i >> 6) & 0x1; fsm_stop_state = (i >> 7) & 0x1; fsm_type_state = (i >> 8); //translate info into things that actually cause state changes //do we stop here? switch(ray_xtype) { case 0: barrier = (ptype == WPAWN) || (uptype == KNIGHT) || (uptype == KING) || (uptype == ROOK); break; case 1: barrier = (ptype == BPAWN) || (uptype == KNIGHT) || (uptype == KING) || (uptype == ROOK); break; case 2: barrier = (uptype == PAWN) || (uptype == KNIGHT) || (uptype == KING) || (uptype == BISHOP); break; case 3: barrier = 1; break; } barrier |= fsm_stop_state == 1; //also stop if we saw a pawn previously. //xray pawn? switch(ray_xtype) { case 0: xpawn = (ptype == BPAWN); break; case 1: xpawn = (ptype == WPAWN); break; case 2: case 3: xpawn = 0; break; } //will we scan through this piece xpiece = ptype != EMPTY && !barrier; //Input: 4 bits ptype, 2 bits decision. //FSM State: 1 bit xray, 1 bit stop_next, 6 states type_state //type state machine: on insertion +T, on removal -T //initial state M: We are a minor piece, and we attack this xsquare either directly or through another minor. // p->board[xsquare] was not a minor piece or a queen. //initial state Q: We are a minor piece xraying a queen, or a queen. // p->board[xsquare] was not a minor piece or a queen. //initial state: M-Q: We are a minor piece, and we attack xsquare directly or through another minor. // p->board[xsquare] was a queen. //initial state: 0: We were "cancelled out" by whatever we found at xsquare. For example, we are a minor // and xsquare is a minor (no change needed). //initial state M-P: We are a minor piece, attack square directly or through another minor, and p->board[xsquare] // was an xray pawn. //initial state Q-P: We are a minor xraying a queen, or a queen, and p->board[xsquare] was an xray pawn. //state transistion diagram // M + Q -> Q // M + * -> M // Q + * -> Q // M-Q + Q -> 0 // M-Q + * -> M-Q // 0 + * -> 0 // M-P + Q -> Q // M-P + * -> M // Q-P + * -> Q //stop state machine: (not really a state machine) handle pawn xray //state 0: Previous square was not an xray pawn //state 1: Previous square was not xsquare and an xray pawn //Insertion xray state machine //state 0: After removing the piece on xsquare, we will attack this square directly //state 1: After removing the piece on xsquare, we will attack this square indirectly //transistions: // 1 + * -> 1 // 0 + XP -> 1 // 0 + * -> 0 //Removal xray state machine //state 0: After inserting the piece on xsquare, we will attack this square indirectly //state 1: After inserting the piece on xsquare, we will not attack this square at all. // //Most of the work here is done before. The only trick is that on xraying a Pawn, we go from 0-1 //immediately afterward. Otherwise, there is no change. // 1 + * -> 1 // 0 + ts > 4 -> 1 // 0 + * -> 0 //stop state nfsm_stop_state = xpawn; //type state if(fsm_type_state == 0 && uptype == QUEEN) nfsm_type_state = 1; else if(fsm_type_state == 2 && uptype == QUEEN) nfsm_type_state = 3; else if(fsm_type_state == 4 && uptype == QUEEN) nfsm_type_state = 1; else if(fsm_type_state == 4 && uptype != QUEEN) nfsm_type_state = 0; else if(fsm_type_state == 5) nfsm_type_state = 1; else nfsm_type_state = fsm_type_state; //insertion xray state if(fsm_xstate == 0 && xpiece) nfsm_xastate = 1; else nfsm_xastate = fsm_xstate; //removal xray state if(fsm_type_state > 3) nfsm_xrstate = 1; else nfsm_xrstate = fsm_xstate; //compress at_xray_add_decision[i] = (barrier << 7) | (nfsm_type_state << 2) | (nfsm_stop_state << 1) | (nfsm_xastate); at_xray_rem_decision[i] = (barrier << 7) | (nfsm_type_state << 2) | (nfsm_stop_state << 1) | (nfsm_xrstate); } //fill gradient vector and ray information for(i = 0; i < 64; i++) { for(j = 0; j < 64; j++) //start by zeroing out everything grad_vector[i][j] = 0; bhp_ray(i) = bhm_ray(i) = bap_ray(i) = bam_ray(i) = (zappa_u64)0; rfp_ray(i) = rfm_ray(i) = rrp_ray(i) = rrm_ray(i) = (zappa_u64)0; for(j = i; (j & 7) != 0 && (j >> 3) != 7;) { //if we are not on the top/left of the board j += 7; //advance in the h1->a8 direction (+7) bhp_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = 7; //note our direction } for(j = i; (j & 7) != 7 && (j >> 3) != 0;) { //if we are not on the bottom/right of the board j -= 7; //advance in the a8->h1 direction (-7) bhm_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = -7; //note our direction } for(j = i; (j & 7) != 7 && (j >> 3) != 7;) { //if we are not on the top/right of the board j += 9; //advance in the a1->h8 direction (+9) bap_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = 9; //note our direction } for(j = i; (j & 7) != 0 && (j >> 3) != 0;) { //if we are not on the bottom/left of the board j -= 9; //advance in the h8->a1 direction (-9) bam_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = -9; //note our direction } for(j = i; (j >> 3) != 7;) { //if we are not on the top of the board j += 8; //advance up the file (+8) rfp_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = 8; //note our direction } for(j = i; (j >> 3) != 0;) { //if we are not on the bottom of the board j -= 8; //advance down the file (-8) rfm_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = -8; //note our direction } for(j = i; (j & 7) != 7;) { //if we are not on the right of the board j += 1; //advance right on the rank (+1) rrp_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = 1; //note our direction } for(j = i; (j & 7) != 0;) { //if we are not on the left of the board j -= 1; //advance left on the rank (-1) rrm_ray(i) |= (zappa_u64)1 << j; //add to ray grad_vector[i][j] = -1; //note our direction } b_rays(i) = bhp_ray(i) | bhm_ray(i) | bap_ray(i) | bam_ray(i); r_rays(i) = rrm_ray(i) | rrp_ray(i) | rfm_ray(i) | rfp_ray(i); } //rotated bitboard attack generation for(i = 0; i < 64; i++) //square loop { //Knights and Kings are easy k_precomp[i] = get_precomp_move_mask(d_kx, d_ky, i); n_precomp[i] = get_precomp_move_mask(d_nx, d_ny, i); for(j = 0; j < 64; j++) //combination loop { //init rf_precomp[i][j] = rr_precomp[i][j] = ba_precomp[i][j] = bh_precomp[i][j] = (zappa_u64)0; //rook file precomputes for(r = ((i & 56) >> 3) - 1; r >= 0; r--) { rf_precomp[i][j] |= ((zappa_u64)1 << ((i & 7) + (r << 3))); if(((1 << r) & (j << 1)) != 0) break; } for(r = ((i & 56) >> 3) + 1 ; r < 8; r++) { rf_precomp[i][j] |= ((zappa_u64)1 << ((i & 7) + (r << 3))); if(((1 << r) & (j << 1)) != 0) break; } //rook rank precomputes for(r = (i & 7) - 1; r >= 0; r--) { rr_precomp[i][j] |= ((zappa_u64)1 << ((i & 56) + r)); if(((1 << r) & (j << 1)) != 0) break; } for(r = (i & 7) + 1 ; r < 8; r++) { rr_precomp[i][j] |= ((zappa_u64)1 << ((i & 56) + r)); if(((1 << r) & (j << 1)) != 0) break; } extfill = (j << 1); //our /4 trick //h1a8 precomputes //rs_index = index in bit: shift offset - byte offset hoff = (i + Rank(i) - 7) & 7; rs_index = inv_roth1a8[i] - hoff*8; for(k = rs_index; 1;) { //1. Check for overflow out of byte if(k < 0 || k > 7) break; //break if we are about to go off the edge of the real board if(Rank(roth1a8[k + hoff*8]) == RANK8 || File(roth1a8[k + hoff*8]) == FILEA) break; //Add square to attack list k++; bh_precomp[i][j] |= (zappa_u64)1 << roth1a8[k + hoff*8]; //break on non-empty square if((extfill >> k & 1)) break; } for(k = rs_index; 1;) { //1. Check for overflow out of byte if(k < 0 || k > 7) break; //break if we are about to go off the edge of the real board if(Rank(roth1a8[k + hoff*8]) == RANK1 || File(roth1a8[k + hoff*8]) == FILEH) break; //Add square to attack list k--; bh_precomp[i][j] |= (zappa_u64)1 << roth1a8[k + hoff*8]; //break on non-empty square if((extfill >> k & 1)) break; } aoff = (i - Rank(i)) & 7; rs_index = inv_rota1h8[i] - aoff*8; for(k = rs_index; 1;) { //1. Check for overflow out of byte if(k < 0 || k > 7) break; //break if we are about to go off the edge of the real board if(Rank(rota1h8[k + aoff*8]) == RANK8 || File(rota1h8[k + aoff*8]) == FILEH) break; //Add square to attack list k++; ba_precomp[i][j] |= (zappa_u64)1 << rota1h8[k + aoff*8]; //break on non-empty square if((extfill >> k & 1)) break; } for(k = rs_index; 1;) { //1. Check for overflow out of byte if(k < 0 || k > 7) break; //break if we are about to go off the edge of the real board if(Rank(rota1h8[k + aoff*8]) == RANK1 || File(rota1h8[k + aoff*8]) == FILEA) break; //Add square to attack list k--; ba_precomp[i][j] |= (zappa_u64)1 << rota1h8[k + aoff*8]; //break on non-empty square if((extfill >> k & 1)) break; } } } //Fill in at_compress table for(i = 0; i < 1024; i++) { //Break down into attackers of each type pcomp = pawn_attackers(i); mcomp = knight_attackers(i) + bishop_attackers(i); rcomp = rook_attackers(i); qcomp = queen_attackers(i) + king_attackers(i); //Spill over into higher buckets && saturate if(pcomp > 1) { mcomp += pcomp - 1; pcomp = 1; } if(mcomp > 3) { rcomp += mcomp - 3; mcomp = 3; } if(rcomp > 1) { qcomp += rcomp - 1; rcomp = 1; } qcomp = int_min(qcomp, 3); at_compress_table[i] = pcomp + 4*mcomp + 8*rcomp + 16*qcomp; } p_rightmask = 0x7F7F7F7F7F7F7F7FULL; //rightmask contains all the squares not on the A file p_leftmask = 0xFEFEFEFEFEFEFEFEULL; //leftmask contains all the squares not on the H file p_allmask = 0x7E7E7E7E7E7E7E7EULL; //allmask contains all squares not on the A or H files second_rank = 0x000000000000FF00ULL; //All squares on the second rank seventh_rank = 0x00FF000000000000ULL; //All squares on the seventh rank } extern zappa_u8 is_horiz_slider[8]; extern zappa_u8 is_diag_slider[8]; void add_to_attack_table(zpos *p, int ptype, int id, int sq) { int i, a; switch(ptype & 0xF) { case BPAWN: add_to_attack_table_bpawn(p, id, sq); break; case WPAWN: add_to_attack_table_wpawn(p, id, sq); break; case BKNIGHT: case WKNIGHT: case BKING: case WKING: add_to_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq); break; case BBISHOP: case BROOK: case BQUEEN: add_to_attack_table_black_sweep(p, id, ptype, sq); break; case WBISHOP: case WROOK: case WQUEEN: add_to_attack_table_white_sweep(p, id, ptype, sq); break; } for(i = 0; i < 2; i++) { for(a = (p->at[i][sq+64] >> 16) & p->slideridm[i]; a != 0; a &= a-1) { id = lead_one_32(a); //shamelessly reusing a variable remove_from_attack_table_xray(p, p->at[i], id, pl_ptype(p->pl[i][id]), sq, pl_square(p->pl[i][id])); } } } //non-incremental version void add_to_attack_table_noxray(zpos *p, int ptype, int id, int sq) { switch(ptype & 0xF) { case BPAWN: add_to_attack_table_bpawn(p, id, sq); break; case WPAWN: add_to_attack_table_wpawn(p, id, sq); break; case BKNIGHT: case WKNIGHT: case BKING: case WKING: add_to_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq); break; case BBISHOP: case BROOK: case BQUEEN: add_to_attack_table_black_sweep(p, id, ptype, sq); break; case WBISHOP: case WROOK: case WQUEEN: add_to_attack_table_white_sweep(p, id, ptype, sq); break; default: break; } } void remove_from_attack_table(zpos *p, int sq) { int i, id, ptype; unsigned int a; ptype = p->board[sq]; id = p->pboard[sq]; switch(ptype & 0xF) { case BPAWN: remove_from_attack_table_bpawn(p, id, sq); break; case WPAWN: remove_from_attack_table_wpawn(p, id, sq); break; case BKNIGHT: case WKNIGHT: case BKING: case WKING: remove_from_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq); break; case BBISHOP: case BROOK: case BQUEEN: remove_from_attack_table_black_sweep(p, id, ptype, sq); break; case WBISHOP: case WROOK: case WQUEEN: remove_from_attack_table_white_sweep(p, id, ptype, sq); break; } for(i = 0; i < 2; i++) { for(a = (p->at[i][sq+64] >> 16) & p->slideridm[i]; a != 0; a &= a-1) { id = lead_one_32(a); //shamelessly reusing a variable add_to_attack_table_xray(p, p->at[i], id, pl_ptype(p->pl[i][id]), sq, pl_square(p->pl[i][id])); } } } static zforceinline void add_to_attack_table_xray(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq, int xsq) { int dsq, rindex, ip, bc, cs, i; zappa_u32 mt, xid[2], xtype[6]; zappa_u8 *move_table; ATTACK_TABLE_PROFILE(at_xray_calls++); //What type of ray are we? rindex = compress_gradient[(dsq = grad(xsq, sq)) + 9]; //Check for early exit. For example, if we are a Bishop on B7 //and "xraying" H1, we clearly have no squares to update. 90% of the time not taken, //so it shouldn't hurt the branch predictor too much. if((scan_edge_table[dsq+9] & edge_table[sq]) != 0) return; ip = (ptype >= BQUEEN); //Check to see if we directly attack the square - this means //we have to scan to find out our type. if((tbl[sq+64] & (tbl[sq+64] >> 16) & (1 << id)) == 0) { //Another early exit case: We attacked through a pawn, e.g. B P p -> //we don't need to do anything at all. if((p->board[sq - dsq] >> 1) == PAWN) return; //Prescan for(i = xsq+dsq; i != sq; i += dsq) ip |= (p->board[i] >= BQUEEN); ip |= 2; //We know we are xraying something, set xray bit. } //OK, we know we are going to have to do some dirty work. //Initialize conditions bc = at_xray_add_xsquare[(ip << 6) + (ray_type_table[rindex] << 4) + p->board[sq]]; cs = *(move_table = xray_slider_move_table[rindex][sq]) & 0x3F; //initialize type mt = at_inc(ptype); xtype[0] = mt; xtype[1] = at_inc(BQUEEN); xtype[2] = mt - at_inc(BQUEEN); xtype[3] = xtype[4] = xtype[5] = 0; //initialize ID xid[0] = (1 << (id + 16)) | (1 << id); xid[1] = 1 << (id + 16); //Do the actual update do { ATTACK_TABLE_PROFILE(at_xray_squares++); tbl[cs] += xtype[bc >> 2]; tbl[cs+64] |= xid[bc & 1]; bc = (bc << 6) | ((*move_table & 0xC0) >> 2) | p->board[cs]; bc = at_xray_add_decision[bc]; move_table += 1; cs = *move_table & 0x3F; } while((bc & 0x80) == 0); } static zforceinline void remove_from_attack_table_xray(zpos * RESTRICT p, zappa_u32 *RESTRICT tbl, int id, int ptype, int sq, int xsq) { int dsq, rindex, ip, bc, cs, i; zappa_u32 mt, xid[2], xtype[6]; zappa_u8 *move_table; ATTACK_TABLE_PROFILE(at_xray_calls++); //What type of ray are we? rindex = compress_gradient[(dsq = grad(xsq, sq)) + 9]; //Check for early exit. For example, if we are a Bishop on B7 //and "xraying" H1, we clearly have no squares to update. 90% of the time not taken, //so it shouldn't hurt the branch predictor too much. if((scan_edge_table[dsq+9] & edge_table[sq]) != 0) return; ip = (ptype >= BQUEEN); //Check to see if we directly attack the square - this means //we have to scan to find out our type. if((tbl[sq+64] & (tbl[sq+64] >> 16) & (1 << id)) == 0) { //Another early exit case: We attacked through a pawn, e.g. B P p -> //we don't need to do anything at all. if((p->board[sq - dsq] >> 1) == PAWN) return; //Prescan for(i = xsq+dsq; i != sq; i += dsq) ip |= (p->board[i] >= BQUEEN); ip |= 2; //We know we are xraying something, set xray bit. } //OK, we know we are going to have to do some dirty work. //Initialize conditions bc = at_xray_rem_xsquare[(ip << 6) + (ray_type_table[rindex] << 4) + p->board[sq]]; cs = *(move_table = xray_slider_move_table[rindex][sq]) & 0x3F; //initialize type mt = at_inc(ptype); xtype[0] = mt; xtype[1] = at_inc(BQUEEN); xtype[2] = mt - at_inc(BQUEEN); xtype[3] = xtype[4] = xtype[5] = 0; //initialize ID xid[0] = ~(1 << id); //reset lower bit only xid[1] = ~((1 << (id + 16)) | (1 << id)); //reset both bits //Do the actual update do { ATTACK_TABLE_PROFILE(at_xray_squares++); tbl[cs] -= xtype[bc >> 2]; tbl[cs+64] &= xid[bc & 1]; bc = (bc << 6) | ((*move_table & 0xC0) >> 2) | p->board[cs]; bc = at_xray_rem_decision[bc]; move_table += 1; cs = *move_table & 0x3F; } while((bc & 0x80) == 0); } // //Functions to add a piece to the attack tables // static zforceinline void add_to_attack_table_wpawn(zpos * RESTRICT p, int id, int sq) { int aid = ((1 << 16) + 1) << id; if(!issquare_tr(sq)) { p->at[1][sq+9] += at_inc(BPAWN); p->at[1][sq+9+64] += aid; } if(!issquare_tl(sq)) { p->at[1][sq+7] += at_inc(BPAWN); p->at[1][sq+7+64] += aid; } } static zforceinline void add_to_attack_table_bpawn(zpos * RESTRICT p, int id, int sq) { int aid = ((1 << 16) + 1) << id; if(!issquare_bl(sq)) { p->at[0][sq-9] += at_inc(BPAWN); p->at[0][sq-9+64] += aid; } if(!issquare_br(sq)) { p->at[0][sq-7] += at_inc(BPAWN); p->at[0][sq-7+64] += aid; } } static zforceinline void add_to_attack_table_white_sweep(zpos * RESTRICT p, int id, int ptype, int sq) { int target, cs, bc, pb, mt, _id, _xid; zappa_u32 xtype[5], xid[5]; zappa_s8 * RESTRICT move_table; ATTACK_TABLE_PROFILE(at_sweep_calls++); //Select which list of moves to use, initialize cs to first element and bc to zero. bc = 0; cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]); _id = (1 << id) | (1 << (id + 16)); _xid = 1 << (id + 16); //Build our table on the fly. This is actually pretty fast: no dependencies, high IPC, //and it merges with the prologue anyway. Athlon can do two 32 bit stores/cycle, P4 only one. xid[0] = _id; xid[1] = xid[2] = xid[3] = xid[4] = _xid; xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] = at_inc(BQUEEN); //1 iteration is 18 cycles on an Athlon XP do { ATTACK_TABLE_PROFILE(at_sweep_squares++); pb = p->board[cs]; mt = *(move_table+32); p->at[1][cs] += xtype[bc]; p->at[1][cs+64] += xid[bc]; bc = (bc << 6) + (mt & 0x30) + pb; target = mt & at_scan_skip_decision[bc]; move_table += 1; bc = at_scan_bc_decision[bc]; move_table += target; } while((cs = *move_table) >= 0); } static zforceinline void add_to_attack_table_black_sweep(zpos * RESTRICT p, int id, int ptype, int sq) { int target, cs, bc, pb, mt, _id, _xid; zappa_u32 xtype[5], xid[5]; zappa_s8 * RESTRICT move_table; ATTACK_TABLE_PROFILE(at_sweep_calls++); //Select which list of moves to use, initialize cs to first element and bc to zero. bc = 0; cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]); _id = (1 << id) | (1 << (id + 16)); _xid = 1 << (id + 16); //Build our table on the fly. This is actually pretty fast: no dependencies, high IPC, //and it merges with the prologue anyway. Athlon can do two 32 bit stores/cycle, P4 only one. xid[0] = _id; xid[1] = xid[2] = xid[3] = xid[4] = _xid; xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] = at_inc(BQUEEN); //1 iteration is 18 cycles on an Athlon XP do { ATTACK_TABLE_PROFILE(at_sweep_squares++); pb = p->board[cs]; mt = *(move_table+32); p->at[0][cs] += xtype[bc]; p->at[0][cs+64] += xid[bc]; bc = (bc << 6) + (mt & 0x30) + pb; target = mt & at_scan_skip_decision[bc]; move_table += 1; bc = at_scan_bc_decision[bc]; move_table += target; } while((cs = *move_table) >= 0); } //Sliders are annoying: fixed pieces are much more straightforward static zforceinline void add_to_attack_table_fixed(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq) { int cs; zappa_u32 xid, type; zappa_s8 *move_table; ATTACK_TABLE_PROFILE(at_fixed_calls++); xid = (1 << id) | (1 << (id + 16)); type = at_inc(ptype); //initialization move_table = mgen_fixed_move_table[zt_fixed_index[ptype]][sq]; cs = *move_table; do { ATTACK_TABLE_PROFILE(at_fixed_squares++); tbl[cs] += type; tbl[cs+64] += xid; move_table++; } while((cs = *move_table) >= 0); } // //Functions to remove a piece from the attack tables // static zforceinline void remove_from_attack_table_bpawn(zpos * RESTRICT p, int id, int sq) { int xid = ~((1 << (id + 16)) + (1 << id)); if(!issquare_bl(sq)) { p->at[0][sq-9] -= at_inc(BPAWN); p->at[0][sq-9+64] &= xid; } if(!issquare_br(sq)) { p->at[0][sq-7] -= at_inc(BPAWN); p->at[0][sq-7+64] &= xid; } } static zforceinline void remove_from_attack_table_wpawn(zpos * RESTRICT p, int id, int sq) { int xid = ~((1 << (id + 16)) + (1 << id)); if(!issquare_tr(sq)) { p->at[1][sq+9] -= at_inc(BPAWN); p->at[1][sq+9+64] &= xid; } if(!issquare_tl(sq)) { p->at[1][sq+7] -= at_inc(BPAWN); p->at[1][sq+7+64] &= xid; } } static zforceinline void remove_from_attack_table_white_sweep(zpos * RESTRICT p, int id, int ptype, int sq) { int target, cs, bc, pb, mt, _id, _xid; zappa_u32 xtype[5], xid[5]; zappa_s8 * RESTRICT move_table; ATTACK_TABLE_PROFILE(at_sweep_calls++); //Select which list of moves to use, initialize cs to first element and bc to zero. bc = 0; cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]); _id = (1 << id) | (1 << (id + 16)); _xid = 1 << (id + 16); //Build our table on the fly. This is actually pretty fast: no dependencies, high IPC, //and it merges with the prologue anyway. Athlon can do two 32 bit stores/cycle, P4 only one. xid[0] = _id; xid[1] = xid[2] = xid[3] = xid[4] = _xid; xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] = at_inc(BQUEEN); //1 iteration is 18 cycles on an Athlon XP do { ATTACK_TABLE_PROFILE(at_sweep_squares++); pb = p->board[cs]; mt = *(move_table+32); p->at[1][cs] -= xtype[bc]; p->at[1][cs+64] -= xid[bc]; bc = (bc << 6) + (mt & 0x30) + pb; target = mt & at_scan_skip_decision[bc]; move_table += 1; bc = at_scan_bc_decision[bc]; move_table += target; } while((cs = *move_table) >= 0); } static zforceinline void remove_from_attack_table_black_sweep(zpos * RESTRICT p, int id, int ptype, int sq) { int target, cs, bc, pb, mt, _id, _xid; zappa_u32 xtype[5], xid[5]; zappa_s8 * RESTRICT move_table; ATTACK_TABLE_PROFILE(at_sweep_calls++); //Select which list of moves to use, initialize cs to first element and bc to zero. bc = 0; cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]); _id = (1 << id) | (1 << (id + 16)); _xid = 1 << (id + 16); //Build our table on the fly. This is actually pretty fast: no dependencies, high IPC, //and it merges with the prologue anyway. Athlon can do two 32 bit stores/cycle, P4 only one. xid[0] = _id; xid[1] = xid[2] = xid[3] = xid[4] = _xid; xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] = at_inc(BQUEEN); //1 iteration is 18 cycles on an Athlon XP do { ATTACK_TABLE_PROFILE(at_sweep_squares++); pb = p->board[cs]; mt = *(move_table+32); p->at[0][cs] -= xtype[bc]; p->at[0][cs+64] -= xid[bc]; bc = (bc << 6) + (mt & 0x30) + pb; target = mt & at_scan_skip_decision[bc]; move_table += 1; bc = at_scan_bc_decision[bc]; move_table += target; } while((cs = *move_table) >= 0); } //Sliders are annoying: fixed pieces are much more straightforward static zforceinline void remove_from_attack_table_fixed(zpos * RESTRICT p, zappa_u32 * RESTRICT tbl, int id, int ptype, int sq) { int cs; zappa_u32 xid, type; zappa_s8 *move_table; ATTACK_TABLE_PROFILE(at_fixed_calls++); xid = (1 << id) | (1 << (id + 16)); type = at_inc(ptype); //initialization move_table = mgen_fixed_move_table[zt_fixed_index[ptype]][sq]; cs = *move_table; do { ATTACK_TABLE_PROFILE(at_fixed_squares++); tbl[cs] -= type; tbl[cs+64] -= xid; move_table++; } while((cs = *move_table) >= 0); } void print_attack_table_profile_info(void) { #ifdef ATTACK_TABLE_PROFILING printf("Sweep square average: %f\n", (float)((double)(at_sweep_squares) / (double)(at_sweep_calls))); printf("Fixed square average: %f\n", (float)((double)(at_fixed_squares) / (double)(at_fixed_calls))); printf("Xray square average: %f\n", (float)((double)(at_xray_squares) / (double)(at_xray_calls))); printf("Sweep Calls: %d\n", (int)at_sweep_calls); printf("Fixed Calls: %d\n", (int)at_fixed_calls); printf("Xray Calls: %d\n", (int)at_xray_calls); #endif }
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.