From 019528dc3d85c3e2534b1fce914b15e8eb9991dd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Mar 2026 13:41:45 -0700 Subject: [PATCH] used claude with claude markdown docs --- .../core/constants/concrete_types.py | 1 + synapseclient/models/curation.py | 223 ++++++++++++++++++ .../models/mixins/asynchronous_job.py | 2 + .../models/async/test_grid_async.py | 87 +++++++ .../models/async/unit_test_curation_async.py | 155 ++++++++++++ 5 files changed, 468 insertions(+) diff --git a/synapseclient/core/constants/concrete_types.py b/synapseclient/core/constants/concrete_types.py index fba11dbdb..a17dfeaf0 100644 --- a/synapseclient/core/constants/concrete_types.py +++ b/synapseclient/core/constants/concrete_types.py @@ -122,6 +122,7 @@ GRID_RECORD_SET_EXPORT_REQUEST = ( "org.sagebionetworks.repo.model.grid.GridRecordSetExportRequest" ) +GRID_CSV_IMPORT_REQUEST = "org.sagebionetworks.repo.model.grid.GridCsvImportRequest" LIST_GRID_SESSIONS_REQUEST = ( "org.sagebionetworks.repo.model.grid.ListGridSessionsRequest" ) diff --git a/synapseclient/models/curation.py b/synapseclient/models/curation.py index 89107c1d2..fd9142dca 100644 --- a/synapseclient/models/curation.py +++ b/synapseclient/models/curation.py @@ -28,11 +28,13 @@ from synapseclient.core.constants.concrete_types import ( CREATE_GRID_REQUEST, FILE_BASED_METADATA_TASK_PROPERTIES, + GRID_CSV_IMPORT_REQUEST, GRID_RECORD_SET_EXPORT_REQUEST, LIST_GRID_SESSIONS_REQUEST, LIST_GRID_SESSIONS_RESPONSE, RECORD_BASED_METADATA_TASK_PROPERTIES, ) +from synapseclient.core.upload.multipart_upload_async import multipart_upload_file_async from synapseclient.core.utils import delete_none_keys, merge_dataclass_entities from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator from synapseclient.models.recordset import ValidationSummary @@ -1078,6 +1080,58 @@ def to_synapse_request(self) -> Dict[str, Any]: return request_dict +@dataclass +class GridCsvImportRequest(AsynchronousCommunicator): + """ + A request to import a CSV file into an active grid session. + + Represents a [Synapse GridCsvImportRequest](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/grid/GridCsvImportRequest.html). + + Attributes: + concrete_type: The concrete type for the request + session_id: The grid session ID to import the CSV into + file_handle_id: The file handle ID of the CSV to import + """ + + concrete_type: str = GRID_CSV_IMPORT_REQUEST + """The concrete type for the request""" + + session_id: Optional[str] = None + """The grid session ID to import the CSV into""" + + file_handle_id: Optional[str] = None + """The file handle ID of the CSV to import""" + + def fill_from_dict( + self, synapse_response: Union[Dict[str, Any], Any] + ) -> "GridCsvImportRequest": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The GridCsvImportRequest object. + """ + self.session_id = synapse_response.get("sessionId", None) + return self + + def to_synapse_request(self) -> Dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + request_dict = {"concreteType": self.concrete_type} + if self.session_id is not None: + request_dict["sessionId"] = self.session_id + if self.file_handle_id is not None: + request_dict["fileHandleId"] = self.file_handle_id + return request_dict + + @dataclass class GridSession: """ @@ -1373,6 +1427,70 @@ def delete(self, *, synapse_client: Optional[Synapse] = None) -> None: """ return None + def import_csv( + self, + file_handle_id: Optional[str] = None, + path: Optional[str] = None, + *, + timeout: int = 120, + synapse_client: Optional[Synapse] = None, + ) -> "Grid": + """ + Import a CSV file into the active grid session. + + Either `file_handle_id` or `path` must be provided. If `path` is provided, + the file will be uploaded via multipart upload and the resulting file handle + ID will be used. + + Arguments: + file_handle_id: The file handle ID of the CSV to import. + Mutually exclusive with `path`. + path: Local path to a CSV file to upload and import. + Mutually exclusive with `file_handle_id`. + timeout: The number of seconds to wait for the job to complete or + progress before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + Grid: This Grid instance. + + Raises: + ValueError: If `session_id` is not set. + ValueError: If neither `file_handle_id` nor `path` is provided. + ValueError: If both `file_handle_id` and `path` are provided. + + Example: Import a CSV by file handle ID +   + + ```python + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + grid = Grid(session_id="abc-123-def") + grid = grid.import_csv(file_handle_id="123456") + ``` + + Example: Import a CSV from a local path +   + + ```python + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + grid = Grid(session_id="abc-123-def") + grid = grid.import_csv(path="/path/to/data.csv") + ``` + """ + return self + @classmethod def list( cls, @@ -1838,3 +1956,108 @@ async def main(): await delete_grid_session( session_id=self.session_id, synapse_client=synapse_client ) + + async def import_csv_async( + self, + file_handle_id: Optional[str] = None, + path: Optional[str] = None, + *, + timeout: int = 120, + synapse_client: Optional[Synapse] = None, + ) -> "Grid": + """ + Import a CSV file into the active grid session. + + Either `file_handle_id` or `path` must be provided. If `path` is provided, + the file will be uploaded via multipart upload and the resulting file handle + ID will be used. + + Arguments: + file_handle_id: The file handle ID of the CSV to import. + Mutually exclusive with `path`. + path: Local path to a CSV file to upload and import. + Mutually exclusive with `file_handle_id`. + timeout: The number of seconds to wait for the job to complete or + progress before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + Grid: This Grid instance. + + Raises: + ValueError: If `session_id` is not set. + ValueError: If neither `file_handle_id` nor `path` is provided. + ValueError: If both `file_handle_id` and `path` are provided. + + Example: Import a CSV by file handle ID asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + async def main(): + grid = Grid(session_id="abc-123-def") + grid = await grid.import_csv_async(file_handle_id="123456") + + asyncio.run(main()) + ``` + + Example: Import a CSV from a local path asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + async def main(): + grid = Grid(session_id="abc-123-def") + grid = await grid.import_csv_async(path="/path/to/data.csv") + + asyncio.run(main()) + ``` + """ + if not self.session_id: + raise ValueError("session_id is required to import a CSV into a Grid") + if file_handle_id is None and path is None: + raise ValueError( + "Either file_handle_id or path must be provided to import a CSV" + ) + if file_handle_id is not None and path is not None: + raise ValueError( + "Only one of file_handle_id or path may be provided, not both" + ) + + trace.get_current_span().set_attributes( + { + "synapse.session_id": self.session_id or "", + } + ) + + if path is not None: + client = Synapse.get_client(synapse_client=synapse_client) + file_handle_id = await multipart_upload_file_async( + syn=client, + file_path=path, + content_type="text/csv", + ) + + import_request = GridCsvImportRequest( + session_id=self.session_id, + file_handle_id=file_handle_id, + ) + await import_request.send_job_and_wait_async( + timeout=timeout, synapse_client=synapse_client + ) + + return self diff --git a/synapseclient/models/mixins/asynchronous_job.py b/synapseclient/models/mixins/asynchronous_job.py index fd3649bc1..4f63ff157 100644 --- a/synapseclient/models/mixins/asynchronous_job.py +++ b/synapseclient/models/mixins/asynchronous_job.py @@ -15,6 +15,7 @@ CREATE_GRID_REQUEST, CREATE_SCHEMA_REQUEST, GET_VALIDATION_SCHEMA_REQUEST, + GRID_CSV_IMPORT_REQUEST, GRID_RECORD_SET_EXPORT_REQUEST, QUERY_BUNDLE_REQUEST, QUERY_TABLE_CSV_REQUEST, @@ -29,6 +30,7 @@ ASYNC_JOB_URIS = { AGENT_CHAT_REQUEST: "/agent/chat/async", CREATE_GRID_REQUEST: "/grid/session/async", + GRID_CSV_IMPORT_REQUEST: "/grid/import/csv/async", GRID_RECORD_SET_EXPORT_REQUEST: "/grid/export/recordset/async", TABLE_UPDATE_TRANSACTION_REQUEST: "/entity/{entityId}/table/transaction/async", GET_VALIDATION_SCHEMA_REQUEST: "/schema/type/validation/async", diff --git a/tests/integration/synapseclient/models/async/test_grid_async.py b/tests/integration/synapseclient/models/async/test_grid_async.py index cd16a0cf0..7ffe626dd 100644 --- a/tests/integration/synapseclient/models/async/test_grid_async.py +++ b/tests/integration/synapseclient/models/async/test_grid_async.py @@ -183,3 +183,90 @@ async def test_delete_grid_session_validation_error_async(self) -> None: match="session_id is required to delete a GridSession", ): await grid.delete_async(synapse_client=self.syn) + + async def test_import_csv_with_file_handle_id_async( + self, record_set_fixture: RecordSet + ) -> None: + # GIVEN: A grid session and a file handle from an existing record set + grid = Grid(record_set_id=record_set_fixture.id) + created_grid = await grid.create_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=self.syn + ) + self.schedule_for_cleanup( + lambda: self.syn.restDELETE(f"/grid/session/{created_grid.session_id}") + ) + + # WHEN: Importing a CSV using the file handle ID from the record set + result = await created_grid.import_csv_async( + file_handle_id=record_set_fixture.data_file_handle_id, + timeout=ASYNC_JOB_TIMEOUT_SEC, + synapse_client=self.syn, + ) + + # THEN: The import should succeed and return the same Grid instance + assert result is created_grid + assert result.session_id == created_grid.session_id + + async def test_import_csv_with_path_async( + self, record_set_fixture: RecordSet + ) -> None: + # GIVEN: A grid session and a local CSV file + grid = Grid(record_set_id=record_set_fixture.id) + created_grid = await grid.create_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=self.syn + ) + self.schedule_for_cleanup( + lambda: self.syn.restDELETE(f"/grid/session/{created_grid.session_id}") + ) + + # Create a temporary CSV file with the same schema as the record set + test_data = pd.DataFrame( + { + "id": [6, 7], + "name": ["Zeta", "Eta"], + "value": [60.1, 70.2], + "category": ["A", "B"], + "active": [True, False], + } + ) + temp_fd, filename = tempfile.mkstemp(suffix=".csv") + try: + os.close(temp_fd) + test_data.to_csv(filename, index=False) + self.schedule_for_cleanup(filename) + + # WHEN: Importing the CSV from a local path + result = await created_grid.import_csv_async( + path=filename, + timeout=ASYNC_JOB_TIMEOUT_SEC, + synapse_client=self.syn, + ) + + # THEN: The import should succeed and return the same Grid instance + assert result is created_grid + assert result.session_id == created_grid.session_id + except Exception: + if os.path.exists(filename): + os.unlink(filename) + raise + + async def test_import_csv_validation_errors_async(self) -> None: + # GIVEN: A Grid instance with no session_id + grid = Grid() + + # WHEN/THEN: Calling import_csv_async without session_id raises ValueError + with pytest.raises( + ValueError, + match="session_id is required", + ): + await grid.import_csv_async(file_handle_id="123", synapse_client=self.syn) + + # GIVEN: A Grid with session_id but no file source + grid_with_session = Grid(session_id="some-session-id") + + # WHEN/THEN: Calling without file_handle_id or path raises ValueError + with pytest.raises( + ValueError, + match="Either file_handle_id or path", + ): + await grid_with_session.import_csv_async(synapse_client=self.syn) diff --git a/tests/unit/synapseclient/models/async/unit_test_curation_async.py b/tests/unit/synapseclient/models/async/unit_test_curation_async.py index 53649445b..7b6d2e9f8 100644 --- a/tests/unit/synapseclient/models/async/unit_test_curation_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_curation_async.py @@ -7,6 +7,7 @@ from synapseclient import Synapse from synapseclient.core.constants.concrete_types import ( FILE_BASED_METADATA_TASK_PROPERTIES, + GRID_CSV_IMPORT_REQUEST, RECORD_BASED_METADATA_TASK_PROPERTIES, ) from synapseclient.models.curation import ( @@ -14,6 +15,7 @@ CurationTask, FileBasedMetadataTaskProperties, Grid, + GridCsvImportRequest, GridRecordSetExportRequest, RecordBasedMetadataTaskProperties, _create_task_properties_from_dict, @@ -905,3 +907,156 @@ def test_to_synapse_request(self) -> None: # THEN it should contain the correct fields assert "concreteType" in result assert result["sessionId"] == SESSION_ID + + +FILE_HANDLE_ID = "987654321" +CSV_PATH = "/tmp/test_data.csv" + + +class TestGridCsvImportRequest: + """Tests for the GridCsvImportRequest helper dataclass.""" + + def test_fill_from_dict(self) -> None: + # GIVEN a response with session data + response = {"sessionId": SESSION_ID} + + # WHEN I fill a GridCsvImportRequest from the response + import_req = GridCsvImportRequest( + session_id=SESSION_ID, file_handle_id=FILE_HANDLE_ID + ) + import_req.fill_from_dict(response) + + # THEN the session_id should be populated + assert import_req.session_id == SESSION_ID + + def test_to_synapse_request(self) -> None: + # GIVEN a GridCsvImportRequest with all fields set + import_req = GridCsvImportRequest( + session_id=SESSION_ID, file_handle_id=FILE_HANDLE_ID + ) + + # WHEN I convert it to a synapse request + result = import_req.to_synapse_request() + + # THEN it should contain the correct fields + assert result["concreteType"] == GRID_CSV_IMPORT_REQUEST + assert result["sessionId"] == SESSION_ID + assert result["fileHandleId"] == FILE_HANDLE_ID + + def test_to_synapse_request_none_values_excluded(self) -> None: + # GIVEN a GridCsvImportRequest with no optional values + import_req = GridCsvImportRequest() + + # WHEN I convert it to a synapse request + result = import_req.to_synapse_request() + + # THEN only concreteType should be present + assert result == {"concreteType": GRID_CSV_IMPORT_REQUEST} + assert "sessionId" not in result + assert "fileHandleId" not in result + + +class TestGridImportCsvAsync: + """Tests for the Grid.import_csv_async method.""" + + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_import_csv_raises_without_session_id(self) -> None: + # GIVEN a Grid without session_id + grid = Grid() + + # WHEN I call import_csv_async without a session_id + # THEN it should raise a ValueError + with pytest.raises(ValueError, match="session_id is required"): + await grid.import_csv_async( + file_handle_id=FILE_HANDLE_ID, synapse_client=self.syn + ) + + async def test_import_csv_raises_without_file_handle_or_path(self) -> None: + # GIVEN a Grid with session_id but no file_handle_id or path + grid = Grid(session_id=SESSION_ID) + + # WHEN I call import_csv_async without file_handle_id or path + # THEN it should raise a ValueError + with pytest.raises(ValueError, match="Either file_handle_id or path"): + await grid.import_csv_async(synapse_client=self.syn) + + async def test_import_csv_raises_with_both_file_handle_and_path(self) -> None: + # GIVEN a Grid with session_id + grid = Grid(session_id=SESSION_ID) + + # WHEN I call import_csv_async with both file_handle_id and path + # THEN it should raise a ValueError + with pytest.raises(ValueError, match="Only one of file_handle_id or path"): + await grid.import_csv_async( + file_handle_id=FILE_HANDLE_ID, + path=CSV_PATH, + synapse_client=self.syn, + ) + + async def test_import_csv_with_file_handle_id(self) -> None: + # GIVEN a Grid with session_id and a file_handle_id + grid = Grid(session_id=SESSION_ID) + + mock_import_request = AsyncMock() + mock_import_request.return_value = GridCsvImportRequest( + session_id=SESSION_ID, file_handle_id=FILE_HANDLE_ID + ) + + with patch( + "synapseclient.models.curation.GridCsvImportRequest.send_job_and_wait_async", + new_callable=AsyncMock, + ) as mock_send: + # WHEN I call import_csv_async with a file_handle_id + result = await grid.import_csv_async( + file_handle_id=FILE_HANDLE_ID, synapse_client=self.syn + ) + + # THEN the import request should be sent with the correct parameters + mock_send.assert_called_once_with(timeout=120, synapse_client=self.syn) + # AND the method should return the same grid instance + assert result is grid + + async def test_import_csv_with_path_uploads_file(self) -> None: + # GIVEN a Grid with session_id and a local path + grid = Grid(session_id=SESSION_ID) + + with patch( + "synapseclient.models.curation.multipart_upload_file_async", + new_callable=AsyncMock, + return_value=FILE_HANDLE_ID, + ) as mock_upload, patch( + "synapseclient.models.curation.GridCsvImportRequest.send_job_and_wait_async", + new_callable=AsyncMock, + ) as mock_send: + # WHEN I call import_csv_async with a local path + result = await grid.import_csv_async(path=CSV_PATH, synapse_client=self.syn) + + # THEN the file should be uploaded + mock_upload.assert_called_once_with( + syn=self.syn, file_path=CSV_PATH, content_type="text/csv" + ) + # AND the import request should be sent + mock_send.assert_called_once_with(timeout=120, synapse_client=self.syn) + # AND the method should return the same grid instance + assert result is grid + + async def test_import_csv_with_custom_timeout(self) -> None: + # GIVEN a Grid with session_id + grid = Grid(session_id=SESSION_ID) + + with patch( + "synapseclient.models.curation.GridCsvImportRequest.send_job_and_wait_async", + new_callable=AsyncMock, + ) as mock_send: + # WHEN I call import_csv_async with a custom timeout + await grid.import_csv_async( + file_handle_id=FILE_HANDLE_ID, + timeout=300, + synapse_client=self.syn, + ) + + # THEN the custom timeout should be passed to the request + mock_send.assert_called_once_with(timeout=300, synapse_client=self.syn)