From b7a0c56f9af8b63c273575386c5fc5929feb16dc Mon Sep 17 00:00:00 2001 From: Jan Kadlec Date: Fri, 27 Feb 2026 15:14:52 +0100 Subject: [PATCH 1/2] ci(docs): add caching and parallel build scripts for doc generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Go module and Hugo resource caching to both hugo-build composite actions. Optimize Dockerfile with BuildKit cache mounts and better layer ordering. Add new scripts for parallel versioned doc generation (discover-versions, generate-single-version, assemble-versions) — not yet referenced by any workflow. jira: trivial risk: nonprod --- .github/actions/hugo-build-action/action.yaml | 13 ++++ .../hugo-build-versioned-action/action.yaml | 13 ++++ docs/Dockerfile | 22 ++++-- scripts/assemble-versions.sh | 55 +++++++++++++++ scripts/discover-versions.sh | 48 +++++++++++++ scripts/generate-single-version.sh | 67 +++++++++++++++++++ 6 files changed, 212 insertions(+), 6 deletions(-) create mode 100755 scripts/assemble-versions.sh create mode 100755 scripts/discover-versions.sh create mode 100755 scripts/generate-single-version.sh diff --git a/.github/actions/hugo-build-action/action.yaml b/.github/actions/hugo-build-action/action.yaml index 582b67638..8623c0870 100644 --- a/.github/actions/hugo-build-action/action.yaml +++ b/.github/actions/hugo-build-action/action.yaml @@ -12,6 +12,13 @@ runs: uses: actions/setup-go@v5 with: go-version: '>=1.20.1' + cache: false + - name: "Cache Go modules" + uses: actions/cache@v4 + with: + path: ~/go/pkg/mod + key: go-mod-${{ hashFiles('docs/go.sum') }} + restore-keys: go-mod- - name: "Setup Node" uses: actions/setup-node@v4 with: @@ -28,6 +35,12 @@ runs: working-directory: ./docs run: | npm ci + - name: "Cache Hugo resources" + uses: actions/cache@v4 + with: + path: docs/resources/_gen + key: hugo-resources-${{ hashFiles('docs/go.sum', 'docs/config/**') }} + restore-keys: hugo-resources- - name: "Build documentation" working-directory: ./docs env: diff --git a/.github/actions/hugo-build-versioned-action/action.yaml b/.github/actions/hugo-build-versioned-action/action.yaml index dfbd721d4..7ad1c1b66 100644 --- a/.github/actions/hugo-build-versioned-action/action.yaml +++ b/.github/actions/hugo-build-versioned-action/action.yaml @@ -35,6 +35,13 @@ runs: - uses: actions/setup-go@v5 with: go-version: '>=1.20.1' + cache: false + - name: "Cache Go modules" + uses: actions/cache@v4 + with: + path: ~/go/pkg/mod + key: go-mod-${{ hashFiles('docs/go.sum') }} + restore-keys: go-mod- - name: "Setup Node" uses: actions/setup-node@v4 with: @@ -59,6 +66,12 @@ runs: wget https://raw.githubusercontent.com/gooddata/gooddata-python-sdk/master/scripts/generate.sh chmod +x ./generate.sh ./generate.sh ${{ inputs.fetch-from }} master + - name: "Cache Hugo resources" + uses: actions/cache@v4 + with: + path: docs/resources/_gen + key: hugo-resources-${{ hashFiles('docs/go.sum', 'docs/config/**') }} + restore-keys: hugo-resources- - name: "Build documentation" working-directory: ./docs env: diff --git a/docs/Dockerfile b/docs/Dockerfile index eac82ebdd..1d2fc2da5 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,15 +1,21 @@ +# syntax=docker/dockerfile:1 FROM python:3.14-slim AS builder RUN apt-get update && apt-get install -y git make curl +# Install Python deps first (changes rarely) for better layer caching. +# Copy only dependency manifests and package source before installing. COPY scripts/script-requirements.txt /scripts/script-requirements.txt -COPY docs docs -COPY scripts/docs/ /docs COPY gooddata-api-client /gooddata-api-client COPY packages/gooddata-sdk /gooddata-sdk COPY packages/gooddata-pandas /gooddata-pandas -RUN pip install --no-cache-dir -r /scripts/script-requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r /scripts/script-requirements.txt + +# Copy source (docs content changes most frequently, scripts less so) +COPY docs docs +COPY scripts/docs/ /docs WORKDIR /docs @@ -21,16 +27,20 @@ RUN python json_builder.py && \ FROM node:20.18.0-bookworm-slim -COPY --from=builder /docs /docs - RUN apt-get update && \ apt-get install -y git make golang-go curl && \ npm install -g hugo-extended@0.117.0 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +COPY --from=builder /docs /docs + WORKDIR /docs -RUN npm install && \ + +# Use BuildKit cache mounts so npm/Go package downloads survive layer rebuilds +RUN --mount=type=cache,target=/root/.npm \ + --mount=type=cache,target=/root/go/pkg/mod \ + npm install && \ hugo mod get # accessible on http://localhost:1313/latest/ diff --git a/scripts/assemble-versions.sh b/scripts/assemble-versions.sh new file mode 100755 index 000000000..c7a963714 --- /dev/null +++ b/scripts/assemble-versions.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# (C) 2026 GoodData Corporation +# Assembles version artifacts into the final versioned_docs structure. +# Run from the docs/ directory after downloading version artifacts. +# +# Expects: +# - versioned_docs-raw/ directory with version-* subdirectories (from artifact download) +# - content/en/ directory with master branch content (from current checkout) +set -e + +content_dir=versioned_docs + +# Start with clean versioned_docs +rm -rf "$content_dir" +mkdir -p "$content_dir" + +# 1. Copy master/current branch content (provides versions page and root structure) +echo "Copying master content from content/en/" +cp -r content/en/. "$content_dir/" + +# 2. Move version artifacts from download directory into versioned_docs +if [ -d "versioned_docs-raw" ]; then + for dir in versioned_docs-raw/version-*/; do + [ -d "$dir" ] || continue + section=$(basename "$dir" | sed 's/^version-//') + echo "Installing version artifact: $section" + # Remove any existing content for this section (from master copy) + rm -rf "${content_dir:?}/$section" + mv "$dir" "$content_dir/$section" + done + rm -rf versioned_docs-raw +fi + +# 3. Remove master's "latest" directory — it will be replaced by the highest numbered version +echo "Removing master's latest directory" +rm -rf "${content_dir:?}/latest" + +# 4. Find the highest numbered version and promote it to "latest" +highest_version=$(ls -1 "./$content_dir/" | grep -E '^[0-9]+$' | sort -V | tail -n 1) + +if [ -n "$highest_version" ]; then + echo "Promoting version $highest_version to /latest" + mv -f "./$content_dir/$highest_version" "./$content_dir/latest" + + # Update version references in links.json + if [ -f "./$content_dir/latest/links.json" ]; then + sed "s|${highest_version}|latest|g" "./$content_dir/latest/links.json" > temp_links.json + mv temp_links.json "./$content_dir/latest/links.json" + fi +else + echo "WARNING: No numbered version directory found to promote to latest" +fi + +echo "Assembly complete. Contents of $content_dir/:" +ls -la "$content_dir/" diff --git a/scripts/discover-versions.sh b/scripts/discover-versions.sh new file mode 100755 index 000000000..0236a537a --- /dev/null +++ b/scripts/discover-versions.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# (C) 2026 GoodData Corporation +# Discovers release versions for parallel doc generation. +# Outputs a JSON array suitable for GitHub Actions matrix strategy. +# Usage: discover-versions.sh [remote_name] [num_versions] +set -e + +remote_name=${1:-origin} +num_versions=${2:-4} + +git fetch "$remote_name" 2>/dev/null + +# Build a map of section -> branch, keeping only the latest minor per section. +# Associative arrays preserve last-write-wins semantics, matching the original +# generate.sh behavior where later branches overwrite earlier ones. +declare -A section_map +declare -a section_order + +while IFS= read -r vers; do + section="${vers%.*}" + if [ -z "${section_map[$section]+x}" ]; then + section_order+=("$section") + fi + # Later (higher minor) versions overwrite earlier ones for the same major + section_map["$section"]="rel/$vers" +done < <(git branch -rl "$remote_name/rel/*" | sed 's|.*/rel/||' | grep -E '^[0-9]+\.[0-9]+$' | sort -t. -k1,1n -k2,2n | tail -n"$num_versions") + +# Add dev branch if it exists +if git branch -rl "$remote_name/rel/dev" | grep -q "rel/dev"; then + if [ -z "${section_map[dev]+x}" ]; then + section_order+=("dev") + fi + section_map["dev"]="rel/dev" +fi + +# Output as JSON array +echo -n "[" +first=true +for section in "${section_order[@]}"; do + branch="${section_map[$section]}" + if [ "$first" = true ]; then + first=false + else + echo -n "," + fi + echo -n "{\"branch\":\"$branch\",\"section\":\"$section\"}" +done +echo "]" diff --git a/scripts/generate-single-version.sh b/scripts/generate-single-version.sh new file mode 100755 index 000000000..712550314 --- /dev/null +++ b/scripts/generate-single-version.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# (C) 2026 GoodData Corporation +# Generates documentation for a single version branch. +# This is the per-version logic extracted from generate.sh for parallel execution. +# +# Usage: generate-single-version.sh
+# Example: generate-single-version.sh origin/rel/1.3 1 +# +# Prerequisites: +# - Repository checked out with the target branch fetched +# - Python environment with script-requirements.txt installed from the TARGET branch +set -e + +branch=$1 +section=$2 + +if [ -z "$branch" ] || [ -z "$section" ]; then + echo "Usage: generate-single-version.sh
" + echo "Example: generate-single-version.sh origin/rel/1.3 1" + exit 1 +fi + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$REPO_ROOT/docs" + +content_dir=versioned_docs + +mkdir -p "$content_dir/$section" + +# Determine source content path on the branch +if git ls-tree -d "$branch" -- "content/en/docs" 2>/dev/null | grep -q "content/en/docs"; then + src_section=docs +else + src_section=latest +fi + +# Extract documentation content from the branch +echo "Extracting docs from $branch for section $section (src=$src_section)" +# strip-components=3 removes content/en/{src_section} prefix +git archive "$branch" "content/en/$src_section" | tar xf - -C "$content_dir/$section" \ + --strip-components=3 "content/en/$src_section" + +# Generate API reference if json_builder.py exists on the branch +API_GEN_FILE="$branch:scripts/docs/json_builder.py" +if git cat-file -e "$API_GEN_FILE" 2>/dev/null; then + echo "Generating API ref for section $section..." + + # Get api_spec.toml from the branch + if git ls-tree --name-only "$branch" | grep -q "^api_spec.toml$"; then + git checkout "$branch" -- api_spec.toml + else + echo "No api_spec.toml on $branch, removing local copy" + rm -f api_spec.toml + fi + + # Generate API introspection data from this version's SDK + python3 ../scripts/docs/json_builder.py + mv -f data.json "$content_dir/$section/" + + # Generate API reference markdown files + python3 ../scripts/docs/python_ref_builder.py api_spec.toml \ + "./$content_dir/$section/data.json" "$section" "$content_dir" +else + echo "No json_builder.py on $branch, skipping API ref generation" +fi + +echo "Done: section $section from $branch" From 822fbbc4a53a2fff994c7b32297d71ffb6b80715 Mon Sep 17 00:00:00 2001 From: Jan Kadlec Date: Fri, 27 Feb 2026 15:15:39 +0100 Subject: [PATCH 2/2] ci(docs): add v2 draft workflow for parallel doc build validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add netlify-deploy-v2.yaml that runs the new parallel matrix architecture (discover → generate per-version → assemble + Hugo build) but deploys as a Netlify draft (no --prod flag). This allows validating the new build pipeline side-by-side with the existing production workflow without any risk to the live site. jira: trivial risk: nonprod --- .github/workflows/netlify-deploy-v2.yaml | 134 +++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 .github/workflows/netlify-deploy-v2.yaml diff --git a/.github/workflows/netlify-deploy-v2.yaml b/.github/workflows/netlify-deploy-v2.yaml new file mode 100644 index 000000000..c0e8be1c5 --- /dev/null +++ b/.github/workflows/netlify-deploy-v2.yaml @@ -0,0 +1,134 @@ +name: Netlify Deploy V2 (Draft) +on: + workflow_dispatch: + +jobs: + # Job 1: Discover which version branches to build + discover-versions: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.versions.outputs.matrix }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Fetch remote refs + run: git fetch origin + - name: Discover versions + id: versions + run: | + MATRIX=$(bash scripts/discover-versions.sh origin 4) + echo "matrix=$MATRIX" >> $GITHUB_OUTPUT + echo "Discovered versions: $MATRIX" + + # Job 2: Generate docs for each version (matrix — runs in parallel across versions) + # Each version gets its own runner with that branch's SDK installed. + # Per-version caching means released branches (which rarely change) are instant cache hits. + generate-version: + needs: [discover-versions] + runs-on: ubuntu-latest + strategy: + matrix: + version: ${{ fromJson(needs.discover-versions.outputs.matrix) }} + fail-fast: false + steps: + - name: Get branch commit SHA + id: sha + env: + GH_TOKEN: ${{ github.token }} + run: | + SHA=$(gh api "repos/${{ github.repository }}/git/ref/heads/${{ matrix.version.branch }}" -q '.object.sha') + echo "sha=$SHA" >> $GITHUB_OUTPUT + echo "Branch ${{ matrix.version.branch }} -> section ${{ matrix.version.section }} (SHA: $SHA)" + - name: Cache version docs + id: cache + uses: actions/cache@v4 + with: + path: docs/versioned_docs/${{ matrix.version.section }} + key: version-docs-${{ matrix.version.section }}-${{ steps.sha.outputs.sha }} + - name: Checkout + if: steps.cache.outputs.cache-hit != 'true' + uses: actions/checkout@v4 + - name: Fetch target branch + if: steps.cache.outputs.cache-hit != 'true' + run: git fetch origin ${{ matrix.version.branch }} + - name: Checkout branch packages + if: steps.cache.outputs.cache-hit != 'true' + run: | + git checkout origin/${{ matrix.version.branch }} -- gooddata-api-client/ packages/gooddata-sdk/ packages/gooddata-pandas/ scripts/script-requirements.txt + - name: Setup Python + if: steps.cache.outputs.cache-hit != 'true' + uses: actions/setup-python@v5 + with: + python-version-file: ".python-version" + cache: 'pip' + cache-dependency-path: scripts/script-requirements.txt + - name: Install Dependencies + if: steps.cache.outputs.cache-hit != 'true' + run: | + python -m pip install --upgrade pip + pip install -r scripts/script-requirements.txt + - name: Generate version docs + if: steps.cache.outputs.cache-hit != 'true' + run: bash scripts/generate-single-version.sh "origin/${{ matrix.version.branch }}" "${{ matrix.version.section }}" + - name: Upload version artifact + uses: actions/upload-artifact@v4 + with: + name: version-${{ matrix.version.section }} + path: docs/versioned_docs/${{ matrix.version.section }} + retention-days: 1 + + # Job 3: Assemble all versions, build Hugo site, and deploy to Netlify (draft) + build-and-deploy: + needs: [generate-version] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + - name: Setup GO + uses: actions/setup-go@v5 + with: + go-version: '>=1.20.1' + cache: false + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: ~/go/pkg/mod + key: go-mod-${{ hashFiles('docs/go.sum') }} + restore-keys: go-mod- + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + cache-dependency-path: docs/package-lock.json + - name: Install Hugo + run: npm install -g hugo-extended@0.117.0 + - name: Install Dependencies + working-directory: ./docs + run: npm ci + - name: Download version artifacts + uses: actions/download-artifact@v4 + with: + pattern: version-* + path: docs/versioned_docs-raw/ + - name: Assemble versioned docs + working-directory: ./docs + run: bash ../scripts/assemble-versions.sh + - name: Cache Hugo resources + uses: actions/cache@v4 + with: + path: docs/resources/_gen + key: hugo-resources-${{ hashFiles('docs/go.sum', 'docs/config/**') }} + restore-keys: hugo-resources- + - name: Build documentation + working-directory: ./docs + run: hugo --minify --baseURL https://www.gooddata.com/docs/python-sdk + - name: Publish (draft) + uses: netlify/actions/cli@master + with: + args: deploy -d docs/public + env: + NETLIFY_SITE_ID: 93e23db0-d31a-4a12-801a-b9479ffef486 # Not a secret + NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}