From 77c913bd617f8bc68015f423be0b853fc5105115 Mon Sep 17 00:00:00 2001 From: GENTILHOMME Thomas Date: Sun, 29 Mar 2026 22:51:53 +0200 Subject: [PATCH] docs: refactor workspace and scanner documentation --- .changeset/mighty-numbers-matter.md | 5 + README.md | 219 +++++----------------- package.json | 1 + workspaces/scanner/ARCHITECTURE.md | 175 +++++++++++++++++ workspaces/scanner/README.md | 176 ++++++++++++++++- workspaces/scanner/docs/from.md | 259 ++++++++++---------------- workspaces/scanner/docs/verify.md | 35 ++++ workspaces/scanner/docs/workingDir.md | 142 ++++++++++++++ 8 files changed, 671 insertions(+), 341 deletions(-) create mode 100644 .changeset/mighty-numbers-matter.md create mode 100644 workspaces/scanner/ARCHITECTURE.md create mode 100644 workspaces/scanner/docs/verify.md create mode 100644 workspaces/scanner/docs/workingDir.md diff --git a/.changeset/mighty-numbers-matter.md b/.changeset/mighty-numbers-matter.md new file mode 100644 index 00000000..6dcec416 --- /dev/null +++ b/.changeset/mighty-numbers-matter.md @@ -0,0 +1,5 @@ +--- +"@nodesecure/scanner": minor +--- + +Refactor workspace README and re-implement proper scanner docs diff --git a/README.md b/README.md index 1b5ac2d9..8450e711 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,28 @@ -# Nodesecure Scanner - -![version](https://img.shields.io/badge/dynamic/json.svg?style=for-the-badge&url=https://raw.githubusercontent.com/NodeSecure/scanner/master/workspaces/scanner/package.json&query=$.version&label=Version) -[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg?style=for-the-badge)](https://github.com/NodeSecure/scanner/graphs/commit-activity) -[![OpenSSF -Scorecard](https://api.securityscorecards.dev/projects/github.com/NodeSecure/scanner/badge?style=for-the-badge)](https://api.securityscorecards.dev/projects/github.com/NodeSecure/scanner) -[![mit](https://img.shields.io/github/license/NodeSecure/scanner.svg?style=for-the-badge)](https://github.com/NodeSecure/scanner/blob/master/LICENSE) -![build](https://img.shields.io/github/actions/workflow/status/NodeSecure/scanner/node.js.yml?style=for-the-badge) +

+ # Nodesecure Scanner +

+ +

+ + version + + + maintained + + + OpenSSF Scorecard + + + mit + + + build + +

⚡️ Run a static analysis of your module's dependencies. -## Requirements - -- [Node.js](https://nodejs.org/en/) version 22 or higher - -## Features +## 💡 Features Scanner builds on [JS-X-Ray](https://github.com/NodeSecure/js-x-ray) (SAST) and [Vulnera](https://github.com/NodeSecure/vulnera) (CVE detection), and adds additional detections such as: @@ -26,9 +35,7 @@ Scanner builds on [JS-X-Ray](https://github.com/NodeSecure/js-x-ray) (SAST) and - Highlights infrastructure components such as ip, hostname, email, url - Supports NPM and Yarn lockfiles -## Getting Started - -This package is available in the Node Package Repository and can be easily installed with [npm](https://docs.npmjs.com/getting-started/what-is-npm) or [yarn](https://yarnpkg.com). +## 💃 Getting Started ```bash $ npm i @nodesecure/scanner @@ -36,171 +43,37 @@ $ npm i @nodesecure/scanner $ yarn add @nodesecure/scanner ``` -## Usage example - -```js -import * as scanner from "@nodesecure/scanner"; -import fs from "node:fs/promises"; - -// CONSTANTS -const kPackagesToAnalyze = ["mocha", "cacache", "is-wsl"]; - -const payloads = await Promise.all( - kPackagesToAnalyze.map((name) => scanner.from(name)) -); - -const promises = []; -for (let i = 0; i < kPackagesToAnalyze.length; i++) { - const data = JSON.stringify(payloads[i], null, 2); - - promises.push(fs.writeFile(`${kPackagesToAnalyze[i]}.json`, data)); -} -await Promise.allSettled(promises); -``` - -## API - -See [types.ts](https://github.com/NodeSecure/scanner/blob/master/workspaces/scanner/src/types.ts) for a complete TypeScript definition. - -```ts -function workingDir( - location: string, - options?: Scanner.WorkingDirOptions, - logger?: Scanner.Logger -): Promise; -function from( - packageName: string, - options?: Scanner.FromOptions, - logger?: Scanner.Logger -): Promise; -function verify( - packageName?: string -): Promise; -``` - -`WorkingDirOptions` and `FromOptions` are described with the following TypeScript interfaces: - -```ts - -type WorkingDirOptions = Options & { - /** - * NPM runtime configuration (such as local .npmrc file) - * It is optionally used to fetch registry authentication tokens - */ - npmRcConfig?: Config; - /** - * Optional cache lookup called after reading the local package.json. - */ - cacheLookup?: ( - packageJSON: PackageJSON - ) => Promise; -}; - -type FromOptions = Omit & { - /** - * Optional cache lookup called after fetching the remote manifest. - */ - cacheLookup?: ( - manifest: pacote.AbbreviatedManifest & pacote.ManifestResult - ) => Promise; -}; - -interface Options { - /** - * Specifies the maximum depth to traverse for each root dependency. - * A value of 2 would mean only traversing deps and their immediate deps. - * - * @default Infinity - */ - readonly maxDepth?: number; - - /** - * Maximum concurrency to fetch and scan NPM tarballs - * @default 8 - */ - readonly maxConcurrency?: number; - - /** - * Includes development dependencies in the walk. - * Note that enabling this option can significantly increase I/O and processing time. - * - * @default false - */ - includeDevDeps?: boolean; +For full API documentation, options, and usage examples, see the [@nodesecure/scanner package README](./workspaces/scanner/README.md). - readonly registry?: string | URL; +## Workspaces - /** - * Enables the use of Arborist for rapidly walking over the dependency tree. - * When enabled, it triggers different methods based on the presence of `node_modules`: - * - `loadActual()` if `node_modules` is available. - * - `loadVirtual()` otherwise. - * - * When disabled, it will iterate on all dependencies by using pacote - */ - packageLock?: { - /** - * Fetches all manifests for additional metadata. - * - * @default false - */ - fetchManifest?: boolean; +- [@nodesecure/scanner](./workspaces/scanner) +- [@nodesecure/tarball](./workspaces/tarball) +- [@nodesecure/tree-walker](./workspaces/tree-walker) +- [@nodesecure/flags](./workspaces/flags) +- [@nodesecure/mama](./workspaces/mama) +- [@nodesecure/contact](./workspaces/contact) +- [@nodesecure/conformance](./workspaces/conformance) +- [@nodesecure/npm-types](./workspaces/npm-types) +- [@nodesecure/i18n](./workspaces/i18n) +- [@nodesecure/rc](./workspaces/rc) +- [@nodesecure/utils](./workspaces/utils) +- [@nodesecure/fs-walk](./workspaces/fs-walk) +- [@nodesecure/github](./workspaces/github) +- [@nodesecure/gitlab](./workspaces/gitlab) - /** - * Specifies the location of the manifest file for Arborist. - * This is typically the path to the `package.json` file. - */ - location: string; - }; +## 🐥 Contributors guide - highlight?: { - contacts?: Contact[]; - packages?: HighlightPackages; - identifiers?: string[]; - }; +If you are a developer **looking to contribute** to the project, you must first read the [CONTRIBUTING](./CONTRIBUTING.md) guide. - /** - * Vulnerability strategy name (npm, snyk, node) - * - * @default NONE - */ - readonly vulnerabilityStrategy?: Vuln.Strategy.Kind; +Once you have finished your development, check that the tests (and linter) are still good by running the following script: - /** - * Analyze root package. - * - * @default false for from() API - * @default true for cwd() API - */ - readonly scanRootNode?: boolean; -} +```bash +$ npm run check ``` -Additional APIs are available at: - -- [from](./workspaces/scanner/docs/from.md) -- [extractors](./workspaces/scanner/docs/extractors.md) -- [logger](./workspaces/scanner/docs/logger.md) - -## Workspaces - -Click on one of the links to access the documentation of the workspace: - -| name | package and link | -| --- | --- | -| tarball | [@nodesecure/tarball](./workspaces/tarball) | -| tree-walker | [@nodesecure/tree-walker](./workspaces/tree-walker) | -| flags | [@nodesecure/flags](./workspaces/flags) | -| mama | [@nodesecure/mama](./workspaces/mama) | -| contact | [@nodesecure/contact](./workspaces/contact) | -| conformance | [@nodesecure/conformance](./workspaces/conformance) | -| npm-types | [@nodesecure/npm-types](./workspaces/npm-types) | -| i18n | [@nodesecure/i18n](./workspaces/i18n) | -| rc | [@nodesecure/rc](./workspaces/rc) | -| utils | [@nodesecure/utils](./workspaces/utils) | -| fs-walk | [@nodesecure/fs-walk](./workspaces/fs-walk) | -| github | [@nodesecure/github](./workspaces/github) | -| gitlab | [@nodesecure/gitlab](./workspaces/gitlab) | +> [!CAUTION] +> In case you introduce a new feature or fix a bug, make sure to include tests for it as well. ## Contributors ✨ diff --git a/package.json b/package.json index 34f512c7..ff775c9d 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "test": "npm run test --ws --if-present", "test-types": "npm run test-types --ws --if-present", "lint": "eslint workspaces", + "check": "npm run lint && npm run test && npm run test-types", "ci:publish": "changeset publish", "ci:version": "changeset version", "clean": "clear-ts-build" diff --git a/workspaces/scanner/ARCHITECTURE.md b/workspaces/scanner/ARCHITECTURE.md new file mode 100644 index 00000000..cebdf52b --- /dev/null +++ b/workspaces/scanner/ARCHITECTURE.md @@ -0,0 +1,175 @@ +# Scanner Architecture + +This document describes the internal mechanics of how the scanner works. Both the [`from()`](./docs/from.md) and [`workingDir()`](./docs/workingDir.md) APIs follow the same pipeline once a manifest is resolved. + +## Steps 0: Registry + +First, we load the correct registry URL. By default, this is the local registry. The command `npm config get registry` is run on the system. + +```js +const registry = options.registry ? new URL(options.registry).toString() : getLocalRegistryURL(); +``` + +## Steps 1: Resolving the manifest + +### `from()` + +The first step is to fetch what we call a `Manifest` on npm for a given Spec (e.g. `mypackage@x.x.x`). For this we use the npm library [pacote](https://github.com/npm/pacote#readme) that does all the work for us. + +```mermaid +graph LR; + A[From API]-->|Spec|B[Fetching Manifest]; + B-->|npm Manifest|C[Dependency Walker]; +``` + +It is important here to dig and learn some vocabulary related to npm: +- [Manifests](https://github.com/npm/pacote#manifests) +- [Packuments](https://github.com/npm/pacote#manifests) (We will see this later). +- Spec (This is the term used to refer to the package name with optional version or SemVer range.) + +To simplify it, the first step is to check the package's existence on the remote registry and to get a structure similar to the `package.json`. + +### `workingDir()` + +Instead of fetching from the registry, the manifest is read directly from the local filesystem: + +```js +const packagePath = path.join(location, "package.json"); +const packageJSON = JSON.parse(await fs.readFile(packagePath, "utf-8")); +``` + +## Steps 1.1: Cache Lookup (optional) + +After resolving the manifest, if a `cacheLookup` function was provided in the options, it is called with the resolved manifest. If it returns a non-null `Payload`, that value is returned immediately and the dependency walker is **never executed**. This is useful to avoid redundant network I/O when a fresh result is already available. + +For `from()`, the callback receives the npm manifest: + +```ts +const payload = await scanner.from("fastify", { + cacheLookup: async(manifest) => { + const cached = await myCache.get(`${manifest.name}@${manifest.version}`); + return cached ?? null; + } +}); +``` + +For `workingDir()`, the callback receives the parsed `package.json` object instead: + +```ts +const payload = await scanner.workingDir(process.cwd(), { + cacheLookup: async(packageJSON) => { + const cached = await myCache.get(`${packageJSON.name}@${packageJSON.version}`); + return cached ?? null; + } +}); +``` + +```mermaid +graph LR; + A[Manifest resolved]-->C{cacheLookup?}; + C-->|Payload returned|D[Return cached result]; + C-->|null returned|E[Dependency Walker]; +``` + +## Steps 2: Dependency Walker + +This step aims to identify and walk through the package dependencies (that's why we call this the dependency walker). To do this, we retrieve the dependencies from the root manifest and start a recursive mechanism. + +```mermaid +graph TD; + A[Dependency Walker]-->B[Fetch root dependencies]; + B-->C[Fetch Dependency Tree]; + C-->|RECURSIVE|C +``` + +> Note: at the beginning of this step we also create a temporary directory with `os.tmpdir()` + +### 2.1 Fetching root dependencies + +The first step is about fetching root dependencies that we previously retrieved from the manifest. + +At this point we create an iterator that will contain both normal packages and packages with a git resolution. Then we use [a package](https://github.com/fraxken/combine-async-iterators) to asynchronously combine AsyncIterators. + +> `from()` doesn't rely on [NPM arborist](https://www.npmjs.com/package/@npmcli/arborist) (it doesn't need a **package-lock.json** file or **node_modules** directory). When the `packageLock` option is set, Arborist is used for faster tree resolution. + +```js +const configRef = { exclude, maxDepth, parent }; +iterators = [ + ...iter.filter(customResolvers.entries(), ([, valueStr]) => isGitDependency(valueStr)) + .map(([depName, valueStr]) => searchDeepDependencies(depName, valueStr, configRef)), + ...iter.map(dependencies.entries(), ([name, ver]) => searchDeepDependencies(`${name}@${ver}`, null, configRef)) +]; + +for await (const dep of combineAsyncIterators({}, ...iterators)) { + yield dep; +} +``` + +### 2.2 Fetching the dependency tree recursively + +Everything is done using Async Generators, which make everything more simple by flattening dependencies. This step uses the same Manifest API from `pacote`. If a given package still has dependencies, the recursive function will continue to execute until there are no more (or it will stop if the maximum depth has been reached). + +Here is a simplified version of the Generator function: + +```js +export async function* searchDeepDependencies(packageName, gitURL, options) { + const { exclude, currDepth = 1, parent, maxDepth } = options; + + const { name, version } = await pacote.manifest(gitURL ?? packageName, { + ...NPM_TOKEN, + registry: getLocalRegistryURL(), + cache: `${os.homedir()}/.npm` + }); + const { dependencies, customResolvers } = mergeDependencies(pkg); + + const current = new Dependency(name, version, parent); + if (currDepth < maxDepth) { + const config = { + exclude, currDepth: currDepth + 1, parent: current, maxDepth + }; + + const depsNames = await Promise.all(iter.map(dependencies.entries(), getCleanDependencyName)); + for (const [fullName, cleanName, isLatest] of depsNames) { + yield* searchDeepDependencies(fullName, null, config); + } + } + + yield current; +} +``` + +Each time a dependency is retrieved, the code runs two separate analyses in parallel: +- Tarball scanning (using `pacote.extract`) +- Fetching additional metadata from the registry + +## Steps 3: Apply vulnera strategy + +The third step is to recover the list of vulnerabilities using the active [vulnera](https://github.com/NodeSecure/vulnera) strategy (or nothing if no strategy is chosen). + +```js +const { hydratePayloadDependencies, strategy } = await vuln.setStrategy(vulnerabilityStrategy); +await hydratePayloadDependencies(dependencies, { + useStandardFormat: true, + path: location +}); +``` + +For more information on how to create a strategy and how they operate, please [read the following documentation](https://github.com/NodeSecure/vulnera/blob/main/docs/adding_new_strategy.md). + +## Steps 4: Warnings and highlighted contacts + +In this step, we look for packages or authors potentially identified as problematic (by the person requesting the analysis or by the scanner itself). + +```js +const { warnings, illuminateds } = await getDependenciesWarnings(dependencies); +payload.warnings = warnings; +payload.highlighted = { + contacts: illuminateds +}; +``` + +By default we show warnings for the following two packages: +- `@scarf/scarf` +- `iohook` + +Since the incident with [Faker](https://snyk.io/blog/npm-faker-package-open-source-libraries/) we also identify Marak's packages as dangerous. diff --git a/workspaces/scanner/README.md b/workspaces/scanner/README.md index 9936a9d3..ec299aa1 100644 --- a/workspaces/scanner/README.md +++ b/workspaces/scanner/README.md @@ -1,7 +1,173 @@ -

- @nodesecure/scanner -

-

- The documentation of this project is in the root README +# Nodesecure Scanner

+ +## 🔎 About + +**Scanner** is a Node.js static analysis tool that recursively walks dependency trees, scans npm tarballs with [JS-X-Ray](https://github.com/NodeSecure/js-x-ray), and enriches results with vulnerability data from [Vulnera](https://github.com/NodeSecure/vulnera). + +## 🚧 Requirements + +- [Node.js](https://nodejs.org/en/) version 22 or higher + +## 💃 Getting Started + +This package is available in the Node Package Repository and can be easily installed with [npm](https://docs.npmjs.com/getting-started/what-is-npm) or [yarn](https://yarnpkg.com). + +```bash +$ npm i @nodesecure/scanner +# or +$ yarn add @nodesecure/scanner +``` + +## 👀 Usage example + +```js +import * as scanner from "@nodesecure/scanner"; +import fs from "node:fs/promises"; + +// CONSTANTS +const kPackagesToAnalyze = ["mocha", "cacache", "is-wsl"]; + +const payloads = await Promise.all( + kPackagesToAnalyze.map((name) => scanner.from(name)) +); + +const promises = []; +for (let i = 0; i < kPackagesToAnalyze.length; i++) { + const data = JSON.stringify(payloads[i], null, 2); + + promises.push(fs.writeFile(`${kPackagesToAnalyze[i]}.json`, data)); +} +await Promise.allSettled(promises); +``` + +## 📚 API + +See [types.ts](https://github.com/NodeSecure/scanner/blob/master/workspaces/scanner/src/types.ts) for a complete TypeScript definition. + +```ts +function workingDir( + location: string, + options?: Scanner.WorkingDirOptions, + logger?: Scanner.Logger +): Promise; +function from( + packageName: string, + options?: Scanner.FromOptions, + logger?: Scanner.Logger +): Promise; +function verify( + packageName?: string +): Promise; +``` + +`WorkingDirOptions` and `FromOptions` are described with the following TypeScript interfaces: + +```ts +type WorkingDirOptions = Options & { + /** + * NPM runtime configuration (such as local .npmrc file) + * It is optionally used to fetch registry authentication tokens + */ + npmRcConfig?: Config; + /** + * Optional cache lookup called after reading the local package.json. + */ + cacheLookup?: ( + packageJSON: PackageJSON + ) => Promise; +}; + +type FromOptions = Omit & { + /** + * Optional cache lookup called after fetching the remote manifest. + */ + cacheLookup?: ( + manifest: pacote.AbbreviatedManifest & pacote.ManifestResult + ) => Promise; +}; + +interface Options { + /** + * Specifies the maximum depth to traverse for each root dependency. + * A value of 2 would mean only traversing deps and their immediate deps. + * + * @default Infinity + */ + readonly maxDepth?: number; + + /** + * Maximum concurrency to fetch and scan NPM tarballs + * @default 8 + */ + readonly maxConcurrency?: number; + + /** + * Includes development dependencies in the walk. + * Note that enabling this option can significantly increase I/O and processing time. + * + * @default false + */ + includeDevDeps?: boolean; + + readonly registry?: string | URL; + + /** + * Enables the use of Arborist for rapidly walking over the dependency tree. + * When enabled, it triggers different methods based on the presence of `node_modules`: + * - `loadActual()` if `node_modules` is available. + * - `loadVirtual()` otherwise. + * + * When disabled, it will iterate on all dependencies by using pacote + */ + packageLock?: { + /** + * Fetches all manifests for additional metadata. + * + * @default false + */ + fetchManifest?: boolean; + + /** + * Specifies the location of the manifest file for Arborist. + * This is typically the path to the `package.json` file. + */ + location: string; + }; + + highlight?: { + contacts?: Contact[]; + packages?: HighlightPackages; + identifiers?: string[]; + }; + + /** + * Vulnerability strategy name (npm, snyk, node) + * + * @default NONE + */ + readonly vulnerabilityStrategy?: Vuln.Strategy.Kind; + + /** + * Analyze root package. + * + * @default false for from() API + * @default true for cwd() API + */ + readonly scanRootNode?: boolean; +} +``` + +Additional API documentation: + +- [from](./docs/from.md) +- [workingDir](./docs/workingDir.md) +- [verify](./docs/verify.md) +- [extractors](./docs/extractors.md) +- [logger](./docs/logger.md) +- [Architecture](./ARCHITECTURE.md) + +## License + +MIT diff --git a/workspaces/scanner/docs/from.md b/workspaces/scanner/docs/from.md index 49edae3a..0ef70411 100644 --- a/workspaces/scanner/docs/from.md +++ b/workspaces/scanner/docs/from.md @@ -1,193 +1,126 @@ -# From API +# from API -## Introduction +Analyze a package from a remote registry (npm by default) by recursively scanning its dependency tree. -This is an API made to analyze a package on a remote registry (by default on the npm registry). - -```js +```ts import * as scanner from "@nodesecure/scanner"; const payload = await scanner.from("fastify"); console.log(payload); ``` -It is also possible to provide options as a second argument. Here are two useful options for the command: +## Signature + +```ts +function from( + packageName: string, + options?: FromOptions, + logger?: Logger +): Promise +``` + +- `packageName` — npm package name, with optional version or semver range (e.g. `"mocha"`, `"mocha@10"`, `"mocha@^10.0.0"`). +- `options` — optional configuration, see `FromOptions` below. +- `logger` — optional logger instance for tracking scan phases. See [logger](./logger.md). + +## Options ```ts +export type FromOptions = Omit & { + /** + * Optional cache lookup called after fetching the remote manifest. + * If it returns a non-null Payload, the dependency walker is skipped entirely. + */ + cacheLookup?: ( + manifest: pacote.AbbreviatedManifest & pacote.ManifestResult + ) => Promise; +}; + export interface Options { /** - * Maximum tree depth + * Specifies the maximum depth to traverse for each root dependency. + * A value of 2 would mean only traversing deps and their immediate deps. + * * @default Infinity */ readonly maxDepth?: number; + /** - * Vulnerability strategy name (npm, snyk, node) - * @default NONE + * Maximum concurrency to fetch and scan NPM tarballs + * + * @default 8 */ - readonly vulnerabilityStrategy: Vuln.Strategy.Kind; + readonly maxConcurrency?: number; readonly registry?: string | URL; -} -``` - -In this guide we will see in depth how the from command has been implemented and how it works. - -## Steps 0: Registry - -First, we load the correct registry URL. By default, is the local registry. The command `npm config get registry` is run on the system. -```js -const registry = options.registry ? new URL(options.registry).toString() : getLocalRegistryURL(); -``` - -## Steps 1: Fetching the manifest - -The first step is to fetch what we call a `Manifest` on npm for a given Spec (eg `mypackage@x.x.x`). For this we use the npm library [pacote](https://github.com/npm/pacote#readme) that do all the work for us. - -```mermaid -graph LR; - A[From API]-->|Spec|B[Fetching Manifest]; - B-->|npm Manifest|C[Dependency Walker]; -``` - -It is important here to dig and learn some vocabulary related to npm: -- [Manifests](https://github.com/npm/pacote#manifests) -- [Packuments](https://github.com/npm/pacote#manifests) (We will see this later). -- Spec (This is the term used to refer to the package name with optional version or SemVer range.) - -To simplify it, the first step is to check the package's existence on the remote registry and to get a structure similar to the `package.json`. - -## Steps 1.5: Cache Lookup (optional) - -After fetching the manifest, if a `cacheLookup` function was provided in the options, it is called with the resolved manifest. If it returns a non-null `Payload`, that value is returned immediately and the dependency walker is **never executed**. This is useful to avoid redundant network I/O when a fresh result is already available. - -```ts -const payload = await scanner.from("fastify", { - cacheLookup: async(manifest) => { - const cached = await myCache.get(`${manifest.name}@${manifest.version}`); - return cached ?? null; - } -}); -``` - -The same `cacheLookup` mechanism is available on `workingDir`. In that case, the callback receives the parsed `package.json` object instead of the npm manifest: - -```ts -const payload = await scanner.workingDir(process.cwd(), { - cacheLookup: async(packageJSON) => { - const cached = await myCache.get(`${packageJSON.name}@${packageJSON.version}`); - return cached ?? null; - } -}); -``` - -```mermaid -graph LR; - A[From API]-->|Spec|B[Fetching Manifest]; - B-->C{cacheLookup?}; - C-->|Payload returned|D[Return cached result]; - C-->|null returned|E[Dependency Walker]; -``` - -## Steps 2: Dependency Walker -This step aims to identify and walk through the package dependencies (that's why we call this the dependency walker). To do this, we retrieve the dependencies from the root of the Manifest in step 1 and start a recursive mechanism. - -```mermaid -graph TD; - A[Dependency Walker]-->B[Fetch root dependencies]; - B-->C[Fetch Dependency Tree]; - C-->|RECURSIVE|C -``` - -> Note: at the beginning of this step we also create a temporary directory with `os.tmpdir()` - -### 2.1 fetching root dependencies - -The first step is about fetching root dependencies that we previously retrieved from the manifest in **step 1**. - -At this point we create an iterator that will contain both normal packages and packages with a git resolution. Then i use [a package i created](https://github.com/fraxken/combine-async-iterators) to Asynchronously combine AsyncIterators. - -> 👀 From API doesn't rely on [NPM arborist](https://www.npmjs.com/package/@npmcli/arborist) (it doesn't need a **package-lock.json** file or **node_modules** directory). - -```js -const configRef = { exclude, maxDepth, parent }; -iterators = [ - ...iter.filter(customResolvers.entries(), ([, valueStr]) => isGitDependency(valueStr)) - .map(([depName, valueStr]) => searchDeepDependencies(depName, valueStr, configRef)), - ...iter.map(dependencies.entries(), ([name, ver]) => searchDeepDependencies(`${name}@${ver}`, null, configRef)) -]; - -for await (const dep of combineAsyncIterators({}, ...iterators)) { - yield dep; -} -``` - -### 2.2 fetching dependency tree recursively - -Everything is done using Async Generators, which make everything more simple by flattening dependency. This step uses the same Manifest API from `pacote`. If a given package still has dependencies, the recursive function will continue to execute until there are no more (or it will stop if the maximum depth has been reached). - -Here is a simplified version of the Generator function: - -```js -export async function* searchDeepDependencies(packageName, gitURL, options) { - const { exclude, currDepth = 1, parent, maxDepth } = options; + /** + * Enables the use of Arborist for rapidly walking over the dependency tree. + * When enabled, it triggers different methods based on the presence of `node_modules`: + * - `loadActual()` if `node_modules` is available. + * - `loadVirtual()` otherwise. + * + * When disabled, it will iterate on all dependencies by using pacote. + */ + packageLock?: { + /** + * Fetches all manifests for additional metadata. + * + * @default false + */ + fetchManifest?: boolean; + + /** + * Specifies the location of the manifest file for Arborist. + * This is typically the path to the `package.json` file. + */ + location: string; + }; + + highlight?: { + contacts?: Contact[]; + packages?: HighlightPackages; + identifiers?: string[]; + }; - const { name, version } = await pacote.manifest(gitURL ?? packageName, { - ...NPM_TOKEN, - registry: getLocalRegistryURL(), - cache: `${os.homedir()}/.npm` - }); - const { dependencies, customResolvers } = mergeDependencies(pkg); + /** + * Vulnerability strategy name (npm, snyk, node) + * + * @default NONE + */ + readonly vulnerabilityStrategy?: Kind; - const current = new Dependency(name, version, parent); - if (currDepth < maxDepth) { - const config = { - exclude, currDepth: currDepth + 1, parent: current, maxDepth - }; + /** + * Analyze root package. + * + * @default false + */ + readonly scanRootNode?: boolean; - const depsNames = await Promise.all(iter.map(dependencies.entries(), getCleanDependencyName)); - for (const [fullName, cleanName, isLatest] of depsNames) { - yield* searchDeepDependencies(fullName, null, config); - } - } + /** + * Enable verbose mode + * + * @default false + */ + isVerbose?: boolean; - yield current; + /** + * Enable worker threads for parallel tarball scanning. + * - `true` uses the default worker count (4) + * - `number` sets an explicit worker count + * + * @default false + */ + readonly workers?: boolean | number; } ``` -Each time a dependency is retrieved the code will run two separate analyses in parallel: -- Tarball scanning (using pacote.extract) -- Fetching additional metadata on the registry - -## Steps 3: Apply vulnera strategy - -The third step is to recover the list of vulnerabilities using the active [vulnera](https://github.com/NodeSecure/vulnera) strategy (or nothing if no strategy is chosen). - -```js -const { hydratePayloadDependencies, strategy } = await vuln.setStrategy(vulnerabilityStrategy); -await hydratePayloadDependencies(dependencies, { - useStandardFormat: true, - path: location -}); -``` - -For more information on how to create a strategy and how they operate, please [read the following documentation](https://github.com/NodeSecure/vulnera/blob/main/docs/adding_new_strategy.md). - -## Steps 4: Get warnings and illuminated contacts - -In this step, we look for packages or authors potentially identified as problematic (by the person requesting the analysis or us). - -```js -const { warnings, illuminateds } = await getDependenciesWarnings(dependencies); -payload.warnings = warnings; -payload.highlighted = { - contacts: illuminateds -}; -``` +## Return value -By default we show warnings for the following two packages: -- @scarf/scarf -- iohook +Returns `Promise`. +See [types.ts](https://github.com/NodeSecure/scanner/blob/master/workspaces/scanner/src/types.ts) for the full type definition. -Since the incident with [Faker](https://snyk.io/blog/npm-faker-package-open-source-libraries/) we also identify Marak's packages as dangerous. +--- +> [!TIP] +> See [ARCHITECTURE.md](../ARCHITECTURE.md) for internal implementation details. diff --git a/workspaces/scanner/docs/verify.md b/workspaces/scanner/docs/verify.md new file mode 100644 index 00000000..115f9025 --- /dev/null +++ b/workspaces/scanner/docs/verify.md @@ -0,0 +1,35 @@ +# verify API + +Scan a single package tarball for security issues using [JS-X-Ray](https://github.com/NodeSecure/js-x-ray). + +```ts +import * as scanner from "@nodesecure/scanner"; + +const result = await scanner.verify("fastify"); +console.log(result); +``` + +## Signature + +```ts +function verify( + packageName?: string +): Promise +``` + +## Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `packageName` | `string` | — | Name (and optional version) of the npm package to verify. If omitted, the current working directory is scanned instead. | + +## Behavior + +- **With `packageName`:** Downloads the package tarball from the npm registry into a temporary directory, then scans its contents. +- **Without `packageName`:** Scans the current working directory (`process.cwd()`) directly. + +Unlike `from()` and `workingDir()`, `verify()` does **not** recursively walk the dependency tree. It scans the package files of a single package only. + +## Return value + +Returns `Promise` from [`@nodesecure/tarball`](https://github.com/NodeSecure/tarball). The result contains the JS-X-Ray analysis of each file in the package, including detected warnings such as obfuscated code, unsafe regex, encoded literals, and more. diff --git a/workspaces/scanner/docs/workingDir.md b/workspaces/scanner/docs/workingDir.md new file mode 100644 index 00000000..5ad6e7e8 --- /dev/null +++ b/workspaces/scanner/docs/workingDir.md @@ -0,0 +1,142 @@ +# workingDir API + +Analyze a local project by reading its `package.json` and recursively scanning its dependency tree. + +```ts +import * as scanner from "@nodesecure/scanner"; + +const payload = await scanner.workingDir(process.cwd()); +console.log(payload); +``` + +## Signature + +```ts +function workingDir( + location?: string, + options?: WorkingDirOptions, + logger?: Logger +): Promise +``` + +- `location` — path to the local project directory (must contain a `package.json`). Defaults to `process.cwd()`. +- `options` — optional configuration, see `WorkingDirOptions` below. +- `logger` — optional logger instance for tracking scan phases. See [logger](./logger.md). + +## Options + +> **Defaults specific to `workingDir()`:** `scanRootNode` is `true` and `includeDevDeps` is `false`. + +```ts +export type WorkingDirOptions = Options & { + /** + * NPM runtime configuration (such as local .npmrc file). + * It is optionally used to fetch registry authentication tokens. + */ + npmRcConfig?: Config; + + /** + * Optional cache lookup called after reading the local package.json. + * If it returns a non-null Payload, the dependency walker is skipped entirely. + */ + cacheLookup?: ( + packageJSON: PackageJSON + ) => Promise; +}; + +export interface Options { + /** + * Specifies the maximum depth to traverse for each root dependency. + * A value of 2 would mean only traversing deps and their immediate deps. + * + * @default Infinity + */ + readonly maxDepth?: number; + + /** + * Maximum concurrency to fetch and scan NPM tarballs + * + * @default 8 + */ + readonly maxConcurrency?: number; + + readonly registry?: string | URL; + + /** + * Enables the use of Arborist for rapidly walking over the dependency tree. + * When enabled, it triggers different methods based on the presence of `node_modules`: + * - `loadActual()` if `node_modules` is available. + * - `loadVirtual()` otherwise. + * + * When disabled, it will iterate on all dependencies by using pacote. + */ + packageLock?: { + /** + * Fetches all manifests for additional metadata. + * + * @default false + */ + fetchManifest?: boolean; + + /** + * Specifies the location of the manifest file for Arborist. + * This is typically the path to the `package.json` file. + */ + location: string; + }; + + highlight?: { + contacts?: Contact[]; + packages?: HighlightPackages; + identifiers?: string[]; + }; + + /** + * Includes development dependencies in the walk. + * Note that enabling this option can significantly increase I/O and processing time. + * + * @default false + */ + includeDevDeps?: boolean; + + /** + * Vulnerability strategy name (npm, snyk, node) + * + * @default NONE + */ + readonly vulnerabilityStrategy?: Kind; + + /** + * Analyze root package. + * + * @default true + */ + readonly scanRootNode?: boolean; + + /** + * Enable verbose mode + * + * @default false + */ + isVerbose?: boolean; + + /** + * Enable worker threads for parallel tarball scanning. + * - `true` uses the default worker count (4) + * - `number` sets an explicit worker count + * + * @default false + */ + readonly workers?: boolean | number; +} +``` + +## Return value + +Returns `Promise`. +See [types.ts](https://github.com/NodeSecure/scanner/blob/master/workspaces/scanner/src/types.ts) for the full type definition. + +--- + +> [!TIP] +> See [ARCHITECTURE.md](../ARCHITECTURE.md) for internal implementation details.