diff --git a/cycode/cli/apps/scan/code_scanner.py b/cycode/cli/apps/scan/code_scanner.py index 3ffefd0f..57319762 100644 --- a/cycode/cli/apps/scan/code_scanner.py +++ b/cycode/cli/apps/scan/code_scanner.py @@ -29,6 +29,7 @@ generate_unique_scan_id, is_cycodeignore_allowed_by_scan_config, set_issue_detected_by_scan_results, + should_use_presigned_upload, ) from cycode.cyclient.models import ZippedFileScanResult from cycode.logger import get_logger @@ -106,7 +107,10 @@ def _should_use_sync_flow(command_scan_type: str, scan_type: str, sync_option: b def _get_scan_documents_thread_func( - ctx: typer.Context, is_git_diff: bool, is_commit_range: bool, scan_parameters: dict + ctx: typer.Context, + is_git_diff: bool, + is_commit_range: bool, + scan_parameters: dict, ) -> Callable[[list[Document]], tuple[str, CliError, LocalScanResult]]: cycode_client = ctx.obj['client'] scan_type = ctx.obj['scan_type'] @@ -203,9 +207,34 @@ def scan_documents( return scan_batch_thread_func = _get_scan_documents_thread_func(ctx, is_git_diff, is_commit_range, scan_parameters) - errors, local_scan_results = run_parallel_batched_scan( - scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar - ) + + if should_use_presigned_upload(scan_type): + try: + # Try to zip all documents as a single batch; ZipTooLargeError raised if it exceeds the scan type's limit + zip_documents(scan_type, documents_to_scan) + # It fits: skip batching and upload everything as one ZIP + errors, local_scan_results = run_parallel_batched_scan( + scan_batch_thread_func, + scan_type, + documents_to_scan, + progress_bar=progress_bar, + skip_batching=True, + ) + except custom_exceptions.ZipTooLargeError: + printer.print_warning( + 'The repository is too large to upload as a single file. ' + 'Falling back to batched scanning. This may result in multiple scan results.' + ) + errors, local_scan_results = run_parallel_batched_scan( + scan_batch_thread_func, + scan_type, + documents_to_scan, + progress_bar=progress_bar, + ) + else: + errors, local_scan_results = run_parallel_batched_scan( + scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar + ) try_set_aggregation_report_url_if_needed(ctx, scan_parameters, ctx.obj['client'], scan_type) @@ -217,6 +246,28 @@ def scan_documents( print_local_scan_results(ctx, local_scan_results, errors) +def _perform_scan_v4_async( + cycode_client: 'ScanClient', + zipped_documents: 'InMemoryZip', + scan_type: str, + scan_parameters: dict, + is_git_diff: bool, + is_commit_range: bool, +) -> ZippedFileScanResult: + upload_link = cycode_client.get_upload_link(scan_type) + logger.debug('Got upload link, %s', {'upload_id': upload_link.upload_id}) + + cycode_client.upload_to_presigned_post(upload_link.url, upload_link.presigned_post_fields, zipped_documents) + logger.debug('Uploaded zip to presigned URL') + + scan_async_result = cycode_client.scan_repository_from_upload_id( + scan_type, upload_link.upload_id, scan_parameters, is_git_diff, is_commit_range + ) + logger.debug('V4 scan request triggered, %s', {'scan_id': scan_async_result.scan_id}) + + return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters) + + def _perform_scan_async( cycode_client: 'ScanClient', zipped_documents: 'InMemoryZip', @@ -262,6 +313,11 @@ def _perform_scan( # it does not support commit range scans; should_use_sync_flow handles it return _perform_scan_sync(cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff) + if scan_type == consts.SAST_SCAN_TYPE: + return _perform_scan_v4_async( + cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff, is_commit_range + ) + return _perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters, is_commit_range) diff --git a/cycode/cli/apps/scan/commit_range_scanner.py b/cycode/cli/apps/scan/commit_range_scanner.py index 85497d5f..8672d2f8 100644 --- a/cycode/cli/apps/scan/commit_range_scanner.py +++ b/cycode/cli/apps/scan/commit_range_scanner.py @@ -86,6 +86,38 @@ def _perform_commit_range_scan_async( return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters, timeout) +def _perform_commit_range_scan_v4_async( + cycode_client: 'ScanClient', + from_commit_zipped_documents: 'InMemoryZip', + to_commit_zipped_documents: 'InMemoryZip', + scan_type: str, + scan_parameters: dict, + timeout: Optional[int] = None, +) -> ZippedFileScanResult: + from_upload_link = cycode_client.get_upload_link(scan_type) + logger.debug('Got from-commit upload link, %s', {'upload_id': from_upload_link.upload_id}) + + cycode_client.upload_to_presigned_post( + from_upload_link.url, from_upload_link.presigned_post_fields, from_commit_zipped_documents + ) + logger.debug('Uploaded from-commit zip') + + to_upload_link = cycode_client.get_upload_link(scan_type) + logger.debug('Got to-commit upload link, %s', {'upload_id': to_upload_link.upload_id}) + + cycode_client.upload_to_presigned_post( + to_upload_link.url, to_upload_link.presigned_post_fields, to_commit_zipped_documents + ) + logger.debug('Uploaded to-commit zip') + + scan_async_result = cycode_client.commit_range_scan_from_upload_ids( + scan_type, from_upload_link.upload_id, to_upload_link.upload_id, scan_parameters + ) + logger.debug('V4 commit range scan request triggered, %s', {'scan_id': scan_async_result.scan_id}) + + return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters, timeout) + + def _scan_commit_range_documents( ctx: typer.Context, from_documents_to_scan: list[Document], @@ -118,14 +150,24 @@ def _scan_commit_range_documents( # for SAST it is files with diff between from_commit and to_commit to_commit_zipped_documents = zip_documents(scan_type, to_documents_to_scan) - scan_result = _perform_commit_range_scan_async( - cycode_client, - from_commit_zipped_documents, - to_commit_zipped_documents, - scan_type, - scan_parameters, - timeout, - ) + if scan_type == consts.SAST_SCAN_TYPE: + scan_result = _perform_commit_range_scan_v4_async( + cycode_client, + from_commit_zipped_documents, + to_commit_zipped_documents, + scan_type, + scan_parameters, + timeout, + ) + else: + scan_result = _perform_commit_range_scan_async( + cycode_client, + from_commit_zipped_documents, + to_commit_zipped_documents, + scan_type, + scan_parameters, + timeout, + ) enrich_scan_result_with_data_from_detection_rules(cycode_client, scan_result) progress_bar.update(ScanProgressBarSection.SCAN) diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index 8f051edd..1a4e31bf 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -166,7 +166,7 @@ COMMIT_HISTORY_COMMAND_SCAN_TYPE_OLD, ] -DEFAULT_CYCODE_DOMAIN = 'cycode.com' +DEFAULT_CYCODE_DOMAIN = 'cycode.xyz' DEFAULT_CYCODE_API_URL = f'https://api.{DEFAULT_CYCODE_DOMAIN}' DEFAULT_CYCODE_APP_URL = f'https://app.{DEFAULT_CYCODE_DOMAIN}' @@ -192,15 +192,18 @@ # 5MB in bytes (in decimal) FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000 +PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 5 * 1024 * 1024 * 1024 # 5 GB (S3 presigned POST limit) +PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE} + DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024 ZIP_MAX_SIZE_LIMIT_IN_BYTES = { SCA_SCAN_TYPE: 200 * 1024 * 1024, - SAST_SCAN_TYPE: 50 * 1024 * 1024, + SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES, } # scan in batches DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024 -SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: 50 * 1024 * 1024} +SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES} SCAN_BATCH_MAX_SIZE_IN_BYTES_ENV_VAR_NAME = 'SCAN_BATCH_MAX_SIZE_IN_BYTES' DEFAULT_SCAN_BATCH_MAX_FILES_COUNT = 1000 diff --git a/cycode/cli/files_collector/zip_documents.py b/cycode/cli/files_collector/zip_documents.py index 6f5edd81..7927bdc6 100644 --- a/cycode/cli/files_collector/zip_documents.py +++ b/cycode/cli/files_collector/zip_documents.py @@ -17,7 +17,11 @@ def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None: raise custom_exceptions.ZipTooLargeError(max_size_limit) -def zip_documents(scan_type: str, documents: list[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip: +def zip_documents( + scan_type: str, + documents: list[Document], + zip_file: Optional[InMemoryZip] = None, +) -> InMemoryZip: if zip_file is None: zip_file = InMemoryZip() diff --git a/cycode/cli/utils/scan_batch.py b/cycode/cli/utils/scan_batch.py index 8bfd7ed0..97e58bc7 100644 --- a/cycode/cli/utils/scan_batch.py +++ b/cycode/cli/utils/scan_batch.py @@ -111,9 +111,13 @@ def run_parallel_batched_scan( scan_type: str, documents: list[Document], progress_bar: 'BaseProgressBar', + skip_batching: bool = False, ) -> tuple[dict[str, 'CliError'], list['LocalScanResult']]: # batching is disabled for SCA; requested by Mor - batches = [documents] if scan_type == consts.SCA_SCAN_TYPE else split_documents_into_batches(scan_type, documents) + if scan_type == consts.SCA_SCAN_TYPE or skip_batching: + batches = [documents] + else: + batches = split_documents_into_batches(scan_type, documents) progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3 # TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps: diff --git a/cycode/cli/utils/scan_utils.py b/cycode/cli/utils/scan_utils.py index be86716b..819a4116 100644 --- a/cycode/cli/utils/scan_utils.py +++ b/cycode/cli/utils/scan_utils.py @@ -5,6 +5,7 @@ import typer +from cycode.cli import consts from cycode.cli.cli_types import SeverityOption if TYPE_CHECKING: @@ -31,6 +32,10 @@ def is_cycodeignore_allowed_by_scan_config(ctx: typer.Context) -> bool: return scan_config.is_cycode_ignore_allowed if scan_config else True +def should_use_presigned_upload(scan_type: str) -> bool: + return scan_type in consts.PRESIGNED_UPLOAD_SCAN_TYPES + + def generate_unique_scan_id() -> UUID: if 'PYTEST_TEST_UNIQUE_ID' in os.environ: return UUID(os.environ['PYTEST_TEST_UNIQUE_ID']) diff --git a/cycode/cyclient/models.py b/cycode/cyclient/models.py index c3144a53..904fe0ef 100644 --- a/cycode/cyclient/models.py +++ b/cycode/cyclient/models.py @@ -114,6 +114,26 @@ def build_dto(self, data: dict[str, Any], **_) -> 'ScanResult': return ScanResult(**data) +@dataclass +class UploadLinkResponse: + upload_id: str + url: str + presigned_post_fields: dict[str, str] + + +class UploadLinkResponseSchema(Schema): + class Meta: + unknown = EXCLUDE + + upload_id = fields.String() + url = fields.String() + presigned_post_fields = fields.Dict(keys=fields.String(), values=fields.String()) + + @post_load + def build_dto(self, data: dict[str, Any], **_) -> 'UploadLinkResponse': + return UploadLinkResponse(**data) + + class ScanInitializationResponse(Schema): def __init__(self, scan_id: Optional[str] = None, err: Optional[str] = None) -> None: super().__init__() diff --git a/cycode/cyclient/scan_client.py b/cycode/cyclient/scan_client.py index 4f2debca..bf7e75b1 100644 --- a/cycode/cyclient/scan_client.py +++ b/cycode/cyclient/scan_client.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Optional, Union from uuid import UUID +import requests from requests import Response from cycode.cli import consts @@ -25,6 +26,7 @@ def __init__( self.scan_config = scan_config self._SCAN_SERVICE_CLI_CONTROLLER_PATH = 'api/v1/cli-scan' + self._SCAN_SERVICE_V4_CLI_CONTROLLER_PATH = 'api/v4/scans/cli' self._DETECTIONS_SERVICE_CLI_CONTROLLER_PATH = 'api/v1/detections/cli' self._POLICIES_SERVICE_CONTROLLER_PATH_V3 = 'api/v3/policies' @@ -56,6 +58,10 @@ def get_scan_aggregation_report_url(self, aggregation_id: str, scan_type: str) - ) return models.ScanReportUrlResponseSchema().build_dto(response.json()) + def get_scan_service_v4_url_path(self, scan_type: str) -> str: + service_path = self.scan_config.get_service_name(scan_type) + return f'{service_path}/{self._SCAN_SERVICE_V4_CLI_CONTROLLER_PATH}' + def get_zipped_file_scan_async_url_path(self, scan_type: str, should_use_sync_flow: bool = False) -> str: async_scan_type = self.scan_config.get_async_scan_type(scan_type) async_entity_type = self.scan_config.get_async_entity_type(scan_type) @@ -123,6 +129,39 @@ def zipped_file_scan_async( ) return models.ScanInitializationResponseSchema().load(response.json()) + def get_upload_link(self, scan_type: str) -> models.UploadLinkResponse: + async_scan_type = self.scan_config.get_async_scan_type(scan_type) + url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/upload-link' + response = self.scan_cycode_client.get(url_path=url_path, hide_response_content_log=self._hide_response_log) + return models.UploadLinkResponseSchema().load(response.json()) + + def upload_to_presigned_post(self, url: str, fields: dict[str, str], zip_file: 'InMemoryZip') -> None: + multipart = {key: (None, value) for key, value in fields.items()} + multipart['file'] = (None, zip_file.read()) + response = requests.post(url, files=multipart, timeout=self.scan_cycode_client.timeout) + response.raise_for_status() + + def scan_repository_from_upload_id( + self, + scan_type: str, + upload_id: str, + scan_parameters: dict, + is_git_diff: bool = False, + is_commit_range: bool = False, + ) -> models.ScanInitializationResponse: + async_scan_type = self.scan_config.get_async_scan_type(scan_type) + url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/repository' + response = self.scan_cycode_client.post( + url_path=url_path, + body={ + 'upload_id': upload_id, + 'is_git_diff': is_git_diff, + 'is_commit_range': is_commit_range, + 'scan_parameters': json.dumps(scan_parameters), + }, + ) + return models.ScanInitializationResponseSchema().load(response.json()) + def commit_range_scan_async( self, from_commit_zip_file: InMemoryZip, @@ -161,6 +200,27 @@ def commit_range_scan_async( ) return models.ScanInitializationResponseSchema().load(response.json()) + def commit_range_scan_from_upload_ids( + self, + scan_type: str, + from_upload_id: str, + to_upload_id: str, + scan_parameters: dict, + is_git_diff: bool = False, + ) -> models.ScanInitializationResponse: + async_scan_type = self.scan_config.get_async_scan_type(scan_type) + url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/repository/commit-range' + response = self.scan_cycode_client.post( + url_path=url_path, + body={ + 'from_upload_id': from_upload_id, + 'to_upload_id': to_upload_id, + 'is_git_diff': is_git_diff, + 'scan_parameters': json.dumps(scan_parameters), + }, + ) + return models.ScanInitializationResponseSchema().load(response.json()) + def get_scan_details_path(self, scan_type: str, scan_id: str) -> str: return f'{self.get_scan_service_url_path(scan_type)}/{scan_id}'