diff --git a/README.md b/README.md index 5295e95..89ff153 100644 --- a/README.md +++ b/README.md @@ -41,15 +41,60 @@ Pre-configured workflow examples are available in the [`workflows/`](workflows/) These examples are production-ready and include best practices for each platform. +## Monorepo Workspace Support + +The Socket CLI supports scanning specific workspaces within monorepo structures while preserving git context from the repository root. This is useful for organizations that maintain multiple applications or services in a single repository. + +### Key Features + +- **Multiple Sub-paths**: Specify multiple `--sub-path` options to scan different directories within your monorepo +- **Combined Workspace**: All sub-paths are scanned together as a single workspace in Socket +- **Git Context Preserved**: Repository metadata (commits, branches, etc.) comes from the main target-path +- **Workspace Naming**: Use `--workspace-name` to differentiate scans from different parts of your monorepo + +### Usage Examples + +**Scan multiple frontend and backend workspaces:** +```bash +socketcli --target-path /path/to/monorepo \ + --sub-path frontend \ + --sub-path backend \ + --sub-path services/api \ + --workspace-name main-app +``` + +**GitHub Actions for monorepo workspace:** +```bash +socketcli --target-path $GITHUB_WORKSPACE \ + --sub-path packages/web \ + --sub-path packages/mobile \ + --workspace-name mobile-web \ + --scm github \ + --pr-number $PR_NUMBER +``` + +This will: +- Scan manifest files in `./packages/web/` and `./packages/mobile/` +- Combine them into a single workspace scan +- Create a repository in Socket named like `my-repo-mobile-web` +- Preserve git context (commits, branch info) from the repository root + +### Requirements + +- Both `--sub-path` and `--workspace-name` must be specified together +- `--sub-path` can be used multiple times to include multiple directories +- All specified sub-paths must exist within the target-path + ## Usage ```` shell -socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--integration {api,github,gitlab}] [--owner OWNER] [--branch BRANCH] - [--committers [COMMITTERS ...]] [--pr-number PR_NUMBER] [--commit-message COMMIT_MESSAGE] [--commit-sha COMMIT_SHA] - [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--save-submitted-files-list SAVE_SUBMITTED_FILES_LIST] - [--default-branch] [--pending-head] [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif] - [--disable-overview] [--disable-security-issue] [--allow-unverified] [--ignore-commit-files] [--disable-blocking] - [--scm SCM] [--timeout TIMEOUT] [--exclude-license-details] +socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--repo-is-public] [--branch BRANCH] [--integration {api,github,gitlab,azure,bitbucket}] + [--owner OWNER] [--pr-number PR_NUMBER] [--commit-message COMMIT_MESSAGE] [--commit-sha COMMIT_SHA] [--committers [COMMITTERS ...]] + [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--license-file-name LICENSE_FILE_NAME] [--save-submitted-files-list SAVE_SUBMITTED_FILES_LIST] + [--save-manifest-tar SAVE_MANIFEST_TAR] [--files FILES] [--sub-path SUB_PATH] [--workspace-name WORKSPACE_NAME] + [--excluded-ecosystems EXCLUDED_ECOSYSTEMS] [--default-branch] [--pending-head] [--generate-license] [--enable-debug] + [--enable-json] [--enable-sarif] [--disable-overview] [--exclude-license-details] [--allow-unverified] [--disable-security-issue] + [--ignore-commit-files] [--disable-blocking] [--enable-diff] [--scm SCM] [--timeout TIMEOUT] [--include-module-folders] [--version] ```` If you don't want to provide the Socket API Token every time then you can use the environment variable `SOCKET_SECURITY_API_KEY` @@ -65,11 +110,11 @@ If you don't want to provide the Socket API Token every time then you can use th | Parameter | Required | Default | Description | |:-----------------|:---------|:--------|:------------------------------------------------------------------------| | --repo | False | *auto* | Repository name in owner/repo format (auto-detected from git remote) | -| --integration | False | api | Integration type (api, github, gitlab) | +| --repo-is-public | False | False | If set, flags a new repository creation as public. Defaults to false. | +| --integration | False | api | Integration type (api, github, gitlab, azure, bitbucket) | | --owner | False | | Name of the integration owner, defaults to the socket organization slug | | --branch | False | *auto* | Branch name (auto-detected from git) | | --committers | False | *auto* | Committer(s) to filter by (auto-detected from git commit) | -| --repo-is-public | False | False | If set, flags a new repository creation as public. Defaults to false. | #### Pull Request and Commit | Parameter | Required | Default | Description | @@ -83,17 +128,20 @@ If you don't want to provide the Socket API Token every time then you can use th |:----------------------------|:---------|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | --target-path | False | ./ | Target path for analysis | | --sbom-file | False | | SBOM file path | -| --files | False | *auto* | Files to analyze (JSON array string). Auto-detected from git commit changes when not specified | -| --excluded-ecosystems | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) | | --license-file-name | False | `license_output.json` | Name of the file to save the license details to if enabled | | --save-submitted-files-list | False | | Save list of submitted file names to JSON file for debugging purposes | | --save-manifest-tar | False | | Save all manifest files to a compressed tar.gz archive with original directory structure | +| --files | False | *auto* | Files to analyze (JSON array string). Auto-detected from git commit changes when not specified | +| --sub-path | False | | Sub-path within target-path for manifest file scanning (can be specified multiple times). All sub-paths are combined into a single workspace scan while preserving git context from target-path. Must be used with --workspace-name | +| --workspace-name | False | | Workspace name suffix to append to repository name (repo-name-workspace_name). Must be used with --sub-path | +| --excluded-ecosystems | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) | #### Branch and Scan Configuration -| Parameter | Required | Default | Description | -|:-----------------|:---------|:--------|:------------------------------------------------------------------------------------------------------| -| --default-branch | False | *auto* | Make this branch the default branch (auto-detected from git and CI environment when not specified) | -| --pending-head | False | *auto* | If true, the new scan will be set as the branch's head scan (automatically synced with default-branch) | +| Parameter | Required | Default | Description | +|:-------------------------|:---------|:--------|:------------------------------------------------------------------------------------------------------| +| --default-branch | False | *auto* | Make this branch the default branch (auto-detected from git and CI environment when not specified) | +| --pending-head | False | *auto* | If true, the new scan will be set as the branch's head scan (automatically synced with default-branch) | +| --include-module-folders | False | False | If enabled will include manifest files from folders like node_modules | #### Output Configuration | Parameter | Required | Default | Description | @@ -104,6 +152,7 @@ If you don't want to provide the Socket API Token every time then you can use th | --enable-sarif | False | False | Enable SARIF output of results instead of table or JSON format | | --disable-overview | False | False | Disable overview output | | --exclude-license-details | False | False | Exclude license details from the diff report (boosts performance for large repos) | +| --version | False | False | Show program's version number and exit | #### Security Configuration | Parameter | Required | Default | Description | @@ -119,7 +168,6 @@ If you don't want to provide the Socket API Token every time then you can use th | --enable-diff | False | False | Enable diff mode even when using --integration api (forces diff mode without SCM integration) | | --scm | False | api | Source control management type | | --timeout | False | | Timeout in seconds for API requests | -| --include-module-folders | False | False | If enabled will include manifest files from folders like node_modules | #### Plugins diff --git a/pyproject.toml b/pyproject.toml index d8747c3..22811e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" [project] name = "socketsecurity" -version = "2.2.9" +version = "2.2.11" requires-python = ">= 3.10" license = {"file" = "LICENSE"} dependencies = [ @@ -16,7 +16,7 @@ dependencies = [ 'GitPython', 'packaging', 'python-dotenv', - 'socketdev>=3.0.5,<4.0.0' + 'socketdev>=3.0.6,<4.0.0' ] readme = "README.md" description = "Socket Security CLI for CI/CD" diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 2ba817a..9ea1adb 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.2.9' +__version__ = '2.2.11' diff --git a/socketsecurity/config.py b/socketsecurity/config.py index 72a2327..bbfb4bc 100644 --- a/socketsecurity/config.py +++ b/socketsecurity/config.py @@ -60,7 +60,7 @@ class CliConfig: license_file_name: str = "license_output.json" save_submitted_files_list: Optional[str] = None save_manifest_tar: Optional[str] = None - sub_path: Optional[str] = None + sub_paths: List[str] = field(default_factory=list) workspace_name: Optional[str] = None @classmethod @@ -108,7 +108,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig': 'license_file_name': args.license_file_name, 'save_submitted_files_list': args.save_submitted_files_list, 'save_manifest_tar': args.save_manifest_tar, - 'sub_path': args.sub_path, + 'sub_paths': args.sub_paths or [], 'workspace_name': args.workspace_name, 'version': __version__ } @@ -133,11 +133,11 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig': if args.owner: config_args['integration_org_slug'] = args.owner - # Validate that sub_path and workspace_name are used together - if args.sub_path and not args.workspace_name: + # Validate that sub_paths and workspace_name are used together + if args.sub_paths and not args.workspace_name: logging.error("--sub-path requires --workspace-name to be specified") exit(1) - if args.workspace_name and not args.sub_path: + if args.workspace_name and not args.sub_paths: logging.error("--workspace-name requires --sub-path to be specified") exit(1) @@ -299,9 +299,10 @@ def create_argument_parser() -> argparse.ArgumentParser: ) path_group.add_argument( "--sub-path", - dest="sub_path", + dest="sub_paths", metavar="", - help="Sub-path within target-path for manifest file scanning (while preserving git context from target-path)" + action="append", + help="Sub-path within target-path for manifest file scanning (can be specified multiple times). All sub-paths will be combined into a single workspace scan while preserving git context from target-path" ) path_group.add_argument( "--workspace-name", diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index 30275b9..6dd6ecb 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -451,14 +451,14 @@ def empty_head_scan_file() -> List[str]: log.debug(f"Created temporary empty file for baseline scan: {temp_path}") return [temp_path] - def create_full_scan(self, files: List[str], params: FullScanParams, base_path: str = None) -> FullScan: + def create_full_scan(self, files: List[str], params: FullScanParams, base_paths: List[str] = None) -> FullScan: """ Creates a new full scan via the Socket API. Args: files: List of file paths to scan params: Parameters for the full scan - base_path: Base path for the scan (optional) + base_paths: List of base paths for the scan (optional) Returns: FullScan object with scan results @@ -466,7 +466,7 @@ def create_full_scan(self, files: List[str], params: FullScanParams, base_path: log.info("Creating new full scan") create_full_start = time.time() - res = self.sdk.fullscans.post(files, params, use_types=True, use_lazy_loading=True, max_open_files=50, base_path=base_path) + res = self.sdk.fullscans.post(files, params, use_types=True, use_lazy_loading=True, max_open_files=50, base_paths=base_paths) if not res.success: log.error(f"Error creating full scan: {res.message}, status: {res.status}") raise Exception(f"Error creating full scan: {res.message}, status: {res.status}") @@ -480,20 +480,22 @@ def create_full_scan(self, files: List[str], params: FullScanParams, base_path: def create_full_scan_with_report_url( self, - path: str, + paths: List[str], params: FullScanParams, no_change: bool = False, save_files_list_path: str = None, - save_manifest_tar_path: str = None + save_manifest_tar_path: str = None, + base_paths: List[str] = None ) -> Diff: """Create a new full scan and return with html_report_url. Args: - path: Path to look for manifest files + paths: List of paths to look for manifest files params: Query params for the Full Scan endpoint no_change: If True, return empty result save_files_list_path: Optional path to save submitted files list for debugging save_manifest_tar_path: Optional path to save manifest files tar.gz archive + base_paths: List of base paths for the scan (optional) Returns: Dict with full scan data including html_report_url @@ -507,24 +509,27 @@ def create_full_scan_with_report_url( if no_change: return diff - # Find manifest files - files = self.find_files(path) + # Find manifest files from all paths + all_files = [] + for path in paths: + files = self.find_files(path) + all_files.extend(files) # Save submitted files list if requested - if save_files_list_path and files: - self.save_submitted_files_list(files, save_files_list_path) + if save_files_list_path and all_files: + self.save_submitted_files_list(all_files, save_files_list_path) - # Save manifest tar.gz if requested - if save_manifest_tar_path and files: - self.save_manifest_tar(files, save_manifest_tar_path, path) + # Save manifest tar.gz if requested (use first path as base) + if save_manifest_tar_path and all_files and paths: + self.save_manifest_tar(all_files, save_manifest_tar_path, paths[0]) - if not files: + if not all_files: return diff try: # Create new scan new_scan_start = time.time() - new_full_scan = self.create_full_scan(files, params, base_path=path) + new_full_scan = self.create_full_scan(all_files, params, base_paths=base_paths) new_scan_end = time.time() log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") except APIFailure as e: @@ -847,37 +852,42 @@ def get_added_and_removed_packages( def create_new_diff( self, - path: str, + paths: List[str], params: FullScanParams, no_change: bool = False, save_files_list_path: str = None, - save_manifest_tar_path: str = None + save_manifest_tar_path: str = None, + base_paths: List[str] = None ) -> Diff: """Create a new diff using the Socket SDK. Args: - path: Path to look for manifest files + paths: List of paths to look for manifest files params: Query params for the Full Scan endpoint no_change: If True, return empty diff save_files_list_path: Optional path to save submitted files list for debugging save_manifest_tar_path: Optional path to save manifest files tar.gz archive + base_paths: List of base paths for the scan (optional) """ log.debug(f"starting create_new_diff with no_change: {no_change}") if no_change: return Diff(id="NO_DIFF_RAN", diff_url="", report_url="") - # Find manifest files - files = self.find_files(path) + # Find manifest files from all paths + all_files = [] + for path in paths: + files = self.find_files(path) + all_files.extend(files) # Save submitted files list if requested - if save_files_list_path and files: - self.save_submitted_files_list(files, save_files_list_path) + if save_files_list_path and all_files: + self.save_submitted_files_list(all_files, save_files_list_path) - # Save manifest tar.gz if requested - if save_manifest_tar_path and files: - self.save_manifest_tar(files, save_manifest_tar_path, path) + # Save manifest tar.gz if requested (use first path as base) + if save_manifest_tar_path and all_files and paths: + self.save_manifest_tar(all_files, save_manifest_tar_path, paths[0]) - if not files: + if not all_files: return Diff(id="NO_DIFF_RAN", diff_url="", report_url="") try: @@ -900,7 +910,7 @@ def create_new_diff( # Create baseline scan with empty file empty_files = Core.empty_head_scan_file() try: - head_full_scan = self.create_full_scan(empty_files, tmp_params, base_path=path) + head_full_scan = self.create_full_scan(empty_files, tmp_params, base_paths=base_paths) head_full_scan_id = head_full_scan.id log.debug(f"Created empty baseline scan: {head_full_scan_id}") @@ -923,7 +933,7 @@ def create_new_diff( # Create new scan try: new_scan_start = time.time() - new_full_scan = self.create_full_scan(files, params, base_path=path) + new_full_scan = self.create_full_scan(all_files, params, base_paths=base_paths) new_scan_end = time.time() log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") except APIFailure as e: diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py index 15ae755..2813790 100644 --- a/socketsecurity/socketcli.py +++ b/socketsecurity/socketcli.py @@ -147,15 +147,22 @@ def main_code(): config.branch = "socket-default-branch" log.debug(f"Using default branch name: {config.branch}") - # Calculate the scan path - combine target_path with sub_path if provided - scan_path = config.target_path - if config.sub_path: + # Calculate the scan paths - combine target_path with sub_paths if provided + scan_paths = [] + base_paths = [config.target_path] # Always use target_path as the single base path + + if config.sub_paths: import os - scan_path = os.path.join(config.target_path, config.sub_path) - log.debug(f"Using sub-path for scanning: {scan_path}") - # Verify the scan path exists - if not os.path.exists(scan_path): - raise Exception(f"Sub-path does not exist: {scan_path}") + for sub_path in config.sub_paths: + full_scan_path = os.path.join(config.target_path, sub_path) + log.debug(f"Using sub-path for scanning: {full_scan_path}") + # Verify the scan path exists + if not os.path.exists(full_scan_path): + raise Exception(f"Sub-path does not exist: {full_scan_path}") + scan_paths.append(full_scan_path) + else: + # Use the target path as the single scan path + scan_paths = [config.target_path] # Modify repository name if workspace_name is provided if config.workspace_name and config.repo: @@ -201,19 +208,22 @@ def main_code(): # Check if we have supported manifest files has_supported_files = files_to_check and core.has_manifest_files(files_to_check) - # If using sub_path, we need to check if manifest files exist in the scan path - if config.sub_path and not files_explicitly_specified: - # Override file checking to look in the scan path instead + # If using sub_paths, we need to check if manifest files exist in the scan paths + if config.sub_paths and not files_explicitly_specified: + # Override file checking to look in the scan paths instead import os from pathlib import Path - # Get manifest files from the scan path + # Get manifest files from all scan paths try: - scan_files = core.find_files(scan_path) - has_supported_files = len(scan_files) > 0 - log.debug(f"Found {len(scan_files)} manifest files in scan path: {scan_path}") + all_scan_files = [] + for scan_path in scan_paths: + scan_files = core.find_files(scan_path) + all_scan_files.extend(scan_files) + has_supported_files = len(all_scan_files) > 0 + log.debug(f"Found {len(all_scan_files)} manifest files across {len(scan_paths)} scan paths") except Exception as e: - log.debug(f"Error finding files in scan path {scan_path}: {e}") + log.debug(f"Error finding files in scan paths: {e}") has_supported_files = False # Case 3: If no supported files or files are empty, force API mode (no PR comments) @@ -301,7 +311,7 @@ def main_code(): log.info("Push initiated flow") if scm.check_event_type() == "diff": log.info("Starting comment logic for PR/MR event") - diff = core.create_new_diff(scan_path, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar) + diff = core.create_new_diff(scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar, base_paths=base_paths) comments = scm.get_comments_for_pr() log.debug("Removing comment alerts") @@ -354,14 +364,14 @@ def main_code(): ) else: log.info("Starting non-PR/MR flow") - diff = core.create_new_diff(scan_path, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar) + diff = core.create_new_diff(scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar, base_paths=base_paths) output_handler.handle_output(diff) elif config.enable_diff and not force_api_mode: # New logic: --enable-diff forces diff mode even with --integration api (no SCM) log.info("Diff mode enabled without SCM integration") - diff = core.create_new_diff(scan_path, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar) + diff = core.create_new_diff(scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar, base_paths=base_paths) output_handler.handle_output(diff) elif config.enable_diff and force_api_mode: @@ -374,11 +384,12 @@ def main_code(): } log.debug(f"params={serializable_params}") diff = core.create_full_scan_with_report_url( - scan_path, + scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, - save_manifest_tar_path=config.save_manifest_tar + save_manifest_tar_path=config.save_manifest_tar, + base_paths=base_paths ) log.info(f"Full scan created with ID: {diff.id}") log.info(f"Full scan report URL: {diff.report_url}") @@ -393,21 +404,23 @@ def main_code(): } log.debug(f"params={serializable_params}") diff = core.create_full_scan_with_report_url( - scan_path, + scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, - save_manifest_tar_path=config.save_manifest_tar + save_manifest_tar_path=config.save_manifest_tar, + base_paths=base_paths ) log.info(f"Full scan created with ID: {diff.id}") log.info(f"Full scan report URL: {diff.report_url}") else: log.info("API Mode") diff = core.create_new_diff( - scan_path, params, + scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, - save_manifest_tar_path=config.save_manifest_tar + save_manifest_tar_path=config.save_manifest_tar, + base_paths=base_paths ) output_handler.handle_output(diff)