-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrust_pgn_reader_python_binding.pyi
More file actions
384 lines (311 loc) · 11 KB
/
rust_pgn_reader_python_binding.pyi
File metadata and controls
384 lines (311 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
from typing import List, Optional, Tuple, Dict, Iterator, Union, overload
import pyarrow
import numpy as np
import numpy.typing as npt
from numpy.typing import NDArray
class PyGameView:
"""Zero-copy view into a single game's data within a ParsedGames result.
Board indexing note: Boards use square indexing (a1=0, h8=63).
To convert to rank/file:
rank = square // 8
file = square % 8
"""
def __len__(self) -> int:
"""Number of moves in this game."""
...
@property
def num_positions(self) -> int:
"""Number of positions recorded for this game."""
...
# === Board state views ===
@property
def boards(self) -> NDArray[np.uint8]:
"""Board positions, shape (num_positions, 8, 8)."""
...
@property
def initial_board(self) -> NDArray[np.uint8]:
"""Initial board position, shape (8, 8)."""
...
@property
def final_board(self) -> NDArray[np.uint8]:
"""Final board position, shape (8, 8)."""
...
@property
def castling(self) -> NDArray[np.bool_]:
"""Castling rights [K,Q,k,q], shape (num_positions, 4)."""
...
@property
def en_passant(self) -> NDArray[np.int8]:
"""En passant file (-1 if none), shape (num_positions,)."""
...
@property
def halfmove_clock(self) -> NDArray[np.uint8]:
"""Halfmove clock, shape (num_positions,)."""
...
@property
def turn(self) -> NDArray[np.bool_]:
"""Side to move (True=white), shape (num_positions,)."""
...
# === Move views ===
@property
def from_squares(self) -> NDArray[np.uint8]:
"""From squares, shape (num_moves,)."""
...
@property
def to_squares(self) -> NDArray[np.uint8]:
"""To squares, shape (num_moves,)."""
...
@property
def promotions(self) -> NDArray[np.int8]:
"""Promotions (-1=none, 2=N, 3=B, 4=R, 5=Q), shape (num_moves,)."""
...
@property
def clocks(self) -> NDArray[np.float32]:
"""Clock times in seconds (NaN if missing), shape (num_moves,)."""
...
@property
def evals(self) -> NDArray[np.float32]:
"""Engine evals (NaN if missing), shape (num_moves,)."""
...
# === Per-game metadata ===
@property
def headers(self) -> Dict[str, str]:
"""Raw PGN headers as dict."""
...
@property
def outcome(self) -> Optional[str]:
"""Game outcome from movetext: 'White', 'Black', 'Draw', 'Unknown', or None."""
...
@property
def is_checkmate(self) -> bool:
"""Final position is checkmate."""
...
@property
def is_stalemate(self) -> bool:
"""Final position is stalemate."""
...
@property
def is_insufficient(self) -> Tuple[bool, bool]:
"""Insufficient material (white, black)."""
...
@property
def legal_move_count(self) -> int:
"""Legal move count in final position."""
...
@property
def is_valid(self) -> bool:
"""Whether game parsed successfully."""
...
@property
def is_game_over(self) -> bool:
"""Whether the game is over (checkmate, stalemate, or both sides insufficient)."""
...
@property
def comments(self) -> List[Optional[str]]:
"""Raw text comments per move (only populated when store_comments=True)."""
...
@property
def legal_moves(self) -> List[List[Tuple[int, int, int]]]:
"""Legal moves at each position (only populated when store_legal_moves=True).
Each entry is a list of (from_square, to_square, promotion) tuples."""
...
# === Convenience methods ===
def move_uci(self, move_idx: int) -> str:
"""Get UCI string for move at index."""
...
def moves_uci(self) -> List[str]:
"""Get all moves as UCI strings."""
...
def __repr__(self) -> str: ...
class ParsedGamesIter:
"""Iterator over games in a ParsedGames result."""
def __iter__(self) -> "ParsedGamesIter": ...
def __next__(self) -> PyGameView: ...
class PyChunkView:
"""View into a single chunk's raw numpy arrays.
Access via ``parsed_games.chunks[i]``. Each chunk corresponds to one
parsing thread's output. Use this for advanced access patterns like
manual concatenation or custom batching.
"""
@property
def num_games(self) -> int: ...
@property
def num_moves(self) -> int: ...
@property
def num_positions(self) -> int: ...
@property
def boards(self) -> NDArray[np.uint8]:
"""Board positions, shape (N_positions, 8, 8), dtype uint8."""
...
@property
def castling(self) -> NDArray[np.bool_]:
"""Castling rights [K,Q,k,q], shape (N_positions, 4), dtype bool."""
...
@property
def en_passant(self) -> NDArray[np.int8]: ...
@property
def halfmove_clock(self) -> NDArray[np.uint8]: ...
@property
def turn(self) -> NDArray[np.bool_]: ...
@property
def from_squares(self) -> NDArray[np.uint8]: ...
@property
def to_squares(self) -> NDArray[np.uint8]: ...
@property
def promotions(self) -> NDArray[np.int8]: ...
@property
def clocks(self) -> NDArray[np.float32]: ...
@property
def evals(self) -> NDArray[np.float32]: ...
@property
def move_offsets(self) -> NDArray[np.uint32]: ...
@property
def position_offsets(self) -> NDArray[np.uint32]: ...
@property
def is_checkmate(self) -> NDArray[np.bool_]: ...
@property
def is_stalemate(self) -> NDArray[np.bool_]: ...
@property
def is_insufficient(self) -> NDArray[np.bool_]: ...
@property
def legal_move_count(self) -> NDArray[np.uint16]: ...
@property
def valid(self) -> NDArray[np.bool_]: ...
@property
def headers(self) -> List[Dict[str, str]]: ...
@property
def outcome(self) -> List[Optional[str]]: ...
@property
def comments(self) -> List[Optional[str]]: ...
@property
def legal_move_from_squares(self) -> NDArray[np.uint8]: ...
@property
def legal_move_to_squares(self) -> NDArray[np.uint8]: ...
@property
def legal_move_promotions(self) -> NDArray[np.int8]: ...
@property
def legal_move_offsets(self) -> NDArray[np.uint32]: ...
def __repr__(self) -> str: ...
class ParsedGames:
"""Chunked container for parsed chess games, optimized for ML training.
Internally stores data in multiple chunks (one per parsing thread) to
avoid the cost of merging. Per-game access is O(log(num_chunks)) via
binary search on precomputed boundaries.
Board layout:
Boards use square indexing: a1=0, b1=1, ..., h8=63
Piece encoding: 0=empty, 1-6=white PNBRQK, 7-12=black pnbrqk
"""
# === Computed properties ===
@property
def num_games(self) -> int:
"""Number of games in the result."""
...
@property
def num_moves(self) -> int:
"""Total number of moves across all games."""
...
@property
def num_positions(self) -> int:
"""Total number of board positions recorded."""
...
@property
def num_chunks(self) -> int:
"""Number of internal chunks."""
...
# === Escape hatch: raw chunk access ===
@property
def chunks(self) -> List[PyChunkView]:
"""Access raw per-chunk data.
Each chunk corresponds to one parsing thread's output. Use this
for advanced access patterns like manual concatenation.
"""
...
# === Sequence protocol ===
def __len__(self) -> int:
"""Number of games in the result."""
...
@overload
def __getitem__(self, idx: int) -> PyGameView: ...
@overload
def __getitem__(self, idx: slice) -> List[PyGameView]: ...
def __getitem__(
self, idx: Union[int, slice]
) -> Union[PyGameView, List[PyGameView]]:
"""Access game(s) by index or slice."""
...
def __iter__(self) -> ParsedGamesIter:
"""Iterate over all games."""
...
# === Mapping utilities ===
def position_to_game(self, position_indices: npt.ArrayLike) -> NDArray[np.int64]:
"""Map global position indices to game indices.
Useful after shuffling/sampling positions to look up game metadata.
Args:
position_indices: Array of indices into the global position space.
Accepts any integer dtype; int64 is optimal (avoids conversion).
Returns:
Array of game indices (same shape as input)
"""
...
def move_to_game(self, move_indices: npt.ArrayLike) -> NDArray[np.int64]:
"""Map global move indices to game indices.
Args:
move_indices: Array of indices into the global move space.
Accepts any integer dtype; int64 is optimal (avoids conversion).
Returns:
Array of game indices (same shape as input)
"""
...
def parse_game(
pgn: str,
store_comments: bool = False,
store_legal_moves: bool = False,
) -> ParsedGames:
"""Parse a single PGN game string.
Convenience wrapper for parsing a single game. Returns a ParsedGames
container with one game.
Args:
pgn: PGN game string
store_comments: Whether to store raw text comments (default: False)
store_legal_moves: Whether to store legal moves at each position (default: False)
Returns:
ParsedGames object containing the parsed game
"""
...
def parse_games(
pgn_chunked_array: pyarrow.ChunkedArray,
num_threads: Optional[int] = None,
chunk_multiplier: Optional[int] = None,
store_comments: bool = False,
store_legal_moves: bool = False,
) -> ParsedGames:
"""Parse chess games from a PyArrow ChunkedArray into flat NumPy arrays.
This API is optimized for ML training pipelines, returning flat NumPy arrays
that can be efficiently batched and processed.
Args:
pgn_chunked_array: PyArrow ChunkedArray containing PGN strings
num_threads: Number of threads for parallel parsing (default: all CPUs)
chunk_multiplier: Multiplier for number of chunks (default: 1)
store_comments: Whether to store raw text comments (default: False)
store_legal_moves: Whether to store legal moves at each position (default: False)
Returns:
ParsedGames object containing flat arrays and iteration support
"""
...
def parse_games_from_strings(
pgns: List[str],
num_threads: Optional[int] = None,
store_comments: bool = False,
store_legal_moves: bool = False,
) -> ParsedGames:
"""Parse multiple PGN game strings in parallel.
Convenience wrapper for when you have a list of strings rather than an Arrow array.
Args:
pgns: List of PGN game strings
num_threads: Number of threads for parallel parsing (default: all CPUs)
store_comments: Whether to store raw text comments (default: False)
store_legal_moves: Whether to store legal moves at each position (default: False)
Returns:
ParsedGames object containing flat arrays and iteration support
"""
...