From cd3e8457d35eade5aa5ea73f5fad20ea4aaa7900 Mon Sep 17 00:00:00 2001 From: German Date: Thu, 2 Apr 2026 12:47:51 -0700 Subject: [PATCH 1/4] feat: add LightRAG + DocumentDB playground MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a self-contained playground demonstrating LightRAG (graph-based RAG engine) using DocumentDB as its MongoDB-compatible storage backend. Includes: - Helm chart with init-container that patches LightRAG for DocumentDB compatibility (skips unsupported createIndex with collation) - Ollama deployment manifest for in-cluster LLM inference - Automated deploy.sh and cleanup.sh scripts - Comprehensive README with architecture, setup, configuration, DocumentDB compatibility matrix, and troubleshooting guide Storage mapping: - KV, Graph, DocStatus → MongoKVStorage/MongoGraphStorage (DocumentDB) - Vectors → NanoVectorDBStorage (local, since DocumentDB lacks $vectorSearch) Tested end-to-end on Kind with DocumentDB Kubernetes Operator. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- documentdb-playground/lightrag/README.md | 208 ++++++++++++++++++ .../lightrag/helm/lightrag-values.yaml | 58 +++++ .../lightrag/helm/lightrag/Chart.yaml | 6 + .../helm/lightrag/templates/NOTES.txt | 20 ++ .../helm/lightrag/templates/_helpers.tpl | 27 +++ .../helm/lightrag/templates/deployment.yaml | 135 ++++++++++++ .../lightrag/helm/lightrag/templates/pvc.yaml | 27 +++ .../helm/lightrag/templates/secret.yaml | 10 + .../helm/lightrag/templates/service.yaml | 15 ++ .../lightrag/helm/lightrag/values.yaml | 53 +++++ .../lightrag/helm/ollama.yaml | 56 +++++ .../lightrag/scripts/cleanup.sh | 22 ++ .../lightrag/scripts/deploy.sh | 77 +++++++ 13 files changed, 714 insertions(+) create mode 100644 documentdb-playground/lightrag/README.md create mode 100644 documentdb-playground/lightrag/helm/lightrag-values.yaml create mode 100644 documentdb-playground/lightrag/helm/lightrag/Chart.yaml create mode 100644 documentdb-playground/lightrag/helm/lightrag/templates/NOTES.txt create mode 100644 documentdb-playground/lightrag/helm/lightrag/templates/_helpers.tpl create mode 100644 documentdb-playground/lightrag/helm/lightrag/templates/deployment.yaml create mode 100644 documentdb-playground/lightrag/helm/lightrag/templates/pvc.yaml create mode 100644 documentdb-playground/lightrag/helm/lightrag/templates/secret.yaml create mode 100644 documentdb-playground/lightrag/helm/lightrag/templates/service.yaml create mode 100644 documentdb-playground/lightrag/helm/lightrag/values.yaml create mode 100644 documentdb-playground/lightrag/helm/ollama.yaml create mode 100755 documentdb-playground/lightrag/scripts/cleanup.sh create mode 100755 documentdb-playground/lightrag/scripts/deploy.sh diff --git a/documentdb-playground/lightrag/README.md b/documentdb-playground/lightrag/README.md new file mode 100644 index 00000000..0cbe8c9b --- /dev/null +++ b/documentdb-playground/lightrag/README.md @@ -0,0 +1,208 @@ +# LightRAG with DocumentDB + +This playground deploys [LightRAG](https://github.com/HKUDS/LightRAG) — a graph-based Retrieval-Augmented Generation (RAG) engine — using DocumentDB as its MongoDB-compatible storage backend. + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ Kubernetes Cluster │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ LightRAG │───▶│ Ollama │ │ +│ │ (RAG Engine)│ │ (LLM + Embed)│ │ +│ └──────┬───────┘ └──────────────┘ │ +│ │ │ +│ │ MongoDB wire protocol │ +│ ▼ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ DocumentDB │───▶│ PostgreSQL │ │ +│ │ (Gateway) │ │ (CNPG) │ │ +│ └──────────────┘ └──────────────┘ │ +│ │ +│ Storage mapping: │ +│ ├─ KV storage → MongoKVStorage (DocumentDB) │ +│ ├─ Graph storage → MongoGraphStorage (DocumentDB) │ +│ ├─ Doc status → MongoDocStatusStorage (DocDB) │ +│ └─ Vector storage → NanoVectorDBStorage (local) │ +└─────────────────────────────────────────────────────┘ +``` + +LightRAG stores knowledge graph nodes, edges, document metadata, and LLM response caches in DocumentDB collections. Vector embeddings use local file-based storage because DocumentDB does not support the Atlas `$vectorSearch` operator. + +## Prerequisites + +- A running Kubernetes cluster with the DocumentDB operator installed +- A healthy DocumentDB instance (see [Quick Start](../../docs/operator-public-documentation/preview/index.md)) +- [Helm](https://helm.sh/docs/intro/install/) v3.0+ +- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) configured for your cluster + +## Quick Start + +```bash +# Deploy everything (Ollama + LightRAG) with default settings +./scripts/deploy.sh + +# Clean up +./scripts/cleanup.sh +``` + +## Step-by-Step Deployment + +### 1. Verify DocumentDB is Running + +```bash +kubectl get documentdb --all-namespaces +# Expected: STATUS = "Cluster in healthy state" +``` + +### 2. Deploy Ollama (LLM Backend) + +```bash +kubectl apply -f helm/ollama.yaml +kubectl wait --for=condition=ready pod -l app=ollama -n lightrag --timeout=120s + +# Pull models (required on first run) +OLLAMA_POD=$(kubectl get pod -l app=ollama -n lightrag -o jsonpath='{.items[0].metadata.name}') +kubectl exec -n lightrag "$OLLAMA_POD" -- ollama pull nomic-embed-text +kubectl exec -n lightrag "$OLLAMA_POD" -- ollama pull qwen2.5:3b +``` + +### 3. Deploy LightRAG + +Edit `helm/lightrag-values.yaml` to set your DocumentDB connection string, then: + +```bash +helm upgrade --install lightrag helm/lightrag \ + -n lightrag \ + -f helm/lightrag-values.yaml +``` + +### 4. Access the WebUI + +```bash +kubectl port-forward svc/lightrag 9621:9621 -n lightrag +# Open http://localhost:9621 +``` + +### 5. Test Document Ingestion + +```bash +# Insert a document +curl -X POST http://localhost:9621/documents/text \ + -H "Content-Type: application/json" \ + -d '{"text": "Your document text here..."}' + +# Query with graph-enhanced RAG +curl -X POST http://localhost:9621/query \ + -H "Content-Type: application/json" \ + -d '{"query": "What is this document about?", "mode": "hybrid"}' +``` + +## Configuration + +### DocumentDB Connection + +Update the `MONGO_URI` in `helm/lightrag-values.yaml`: + +```yaml +env: + MONGO_URI: "mongodb://:@..svc.cluster.local:/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&tlsAllowInvalidCertificates=true" + MONGO_DATABASE: "LightRAG" +``` + +To get your connection details: + +```bash +# Get the gateway service +kubectl get svc -n | grep documentdb-service + +# Get credentials +kubectl get secret documentdb-credentials -n \ + -o jsonpath='{.data.username}' | base64 -d +kubectl get secret documentdb-credentials -n \ + -o jsonpath='{.data.password}' | base64 -d +``` + +### LLM Configuration + +The default configuration uses [Ollama](https://ollama.com) with `qwen2.5:3b` for text generation and `nomic-embed-text` for embeddings. To use OpenAI instead: + +```yaml +env: + LLM_BINDING: openai + LLM_MODEL: gpt-4o-mini + LLM_BINDING_API_KEY: "sk-..." + EMBEDDING_BINDING: openai + EMBEDDING_MODEL: text-embedding-3-small + EMBEDDING_DIM: "1536" + EMBEDDING_BINDING_API_KEY: "sk-..." +``` + +### Storage Configuration + +| Storage Type | Backend | Notes | +|---|---|---| +| KV Storage | `MongoKVStorage` | Documents, chunks, entities, relations | +| Graph Storage | `MongoGraphStorage` | Knowledge graph nodes and edges | +| Doc Status | `MongoDocStatusStorage` | Document processing state | +| Vector Storage | `NanoVectorDBStorage` | Local file-based (PVC) | + +> **Why not MongoVectorDBStorage?** DocumentDB does not support the MongoDB Atlas `$vectorSearch` aggregation operator required by `MongoVectorDBStorage`. The file-based `NanoVectorDBStorage` works without limitations. + +## DocumentDB Compatibility + +LightRAG's MongoDB storage assumes MongoDB Atlas features. This playground includes an init container that patches the LightRAG code for DocumentDB compatibility: + +| Feature | MongoDB Atlas | DocumentDB | Workaround | +|---|---|---|---| +| `$vectorSearch` | ✅ | ❌ | Use NanoVectorDBStorage | +| `$listSearchIndexes` | ✅ | ❌ | Graceful fallback to regex | +| `createIndex` with collation | ✅ | ❌ | Skip collation indexes | +| `createIndex` (secondary) | ✅ | Hangs | Skip via init-container patch | +| Basic CRUD operations | ✅ | ✅ | Works natively | +| Aggregation pipelines | ✅ | ✅ | `$group`, `$match`, `$sort` work | + +The init container applies these patches automatically — no manual configuration is needed. + +## Verified Operations + +The following LightRAG operations have been tested with DocumentDB: + +- ✅ Document ingestion and chunking +- ✅ Entity and relationship extraction (via LLM) +- ✅ Knowledge graph storage and traversal +- ✅ LLM response caching +- ✅ Naive, local, global, and hybrid RAG queries +- ✅ Document status tracking +- ✅ WebUI for graph visualization + +## Troubleshooting + +### LightRAG pod stuck in `Running` but not `Ready` + +The most common cause is `createIndex` hanging on DocumentDB. Verify the init container patch applied correctly: + +```bash +POD=$(kubectl get pod -l app.kubernetes.io/name=lightrag -n lightrag -o jsonpath='{.items[0].metadata.name}') +kubectl logs -n lightrag "$POD" -c patch-for-documentdb +# Should show: "DocumentDB compatibility patches applied" +``` + +### Cannot connect to DocumentDB + +Verify the gateway service is reachable from the lightrag namespace: + +```bash +kubectl run mongo-test --rm -it --restart=Never -n lightrag --image=mongo:7 \ + --command -- mongosh "" --eval 'db.adminCommand({ping:1})' +``` + +### LLM errors during document processing + +Check that Ollama has the models pulled: + +```bash +OLLAMA_POD=$(kubectl get pod -l app=ollama -n lightrag -o jsonpath='{.items[0].metadata.name}') +kubectl exec -n lightrag "$OLLAMA_POD" -- ollama list +``` diff --git a/documentdb-playground/lightrag/helm/lightrag-values.yaml b/documentdb-playground/lightrag/helm/lightrag-values.yaml new file mode 100644 index 00000000..24dd33f2 --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag-values.yaml @@ -0,0 +1,58 @@ +# LightRAG configuration for DocumentDB backend. +# Update MONGO_URI with your DocumentDB connection string before deploying. +replicaCount: 1 + +image: + repository: ghcr.io/hkuds/lightrag + tag: latest + imagePullSecrets: [] + +updateStrategy: + type: Recreate + +service: + type: ClusterIP + port: 9621 + +resources: + limits: + cpu: 1000m + memory: 2Gi + requests: + cpu: 500m + memory: 1Gi + +persistence: + enabled: true + ragStorage: + size: 5Gi + inputs: + size: 2Gi + +env: + HOST: "0.0.0.0" + PORT: "9621" + WEBUI_TITLE: "LightRAG + DocumentDB" + WEBUI_DESCRIPTION: "Graph RAG backed by DocumentDB Kubernetes Operator" + # LLM - Ollama (in-cluster) + LLM_BINDING: ollama + LLM_MODEL: qwen2.5:3b + LLM_BINDING_HOST: "http://ollama.lightrag.svc.cluster.local:11434" + LLM_BINDING_API_KEY: "" + # Embedding - Ollama (in-cluster) + EMBEDDING_BINDING: ollama + EMBEDDING_MODEL: nomic-embed-text + EMBEDDING_DIM: "768" + EMBEDDING_BINDING_API_KEY: "" + # Storage - DocumentDB for KV/Graph/DocStatus, local for vectors + LIGHTRAG_KV_STORAGE: MongoKVStorage + LIGHTRAG_VECTOR_STORAGE: NanoVectorDBStorage + LIGHTRAG_GRAPH_STORAGE: MongoGraphStorage + LIGHTRAG_DOC_STATUS_STORAGE: MongoDocStatusStorage + # DocumentDB connection — update these values for your cluster + MONGO_URI: "mongodb://admin:MySecurePassword123@documentdb-service-my-cluster.documentdb-demo.svc.cluster.local:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&tlsAllowInvalidCertificates=true" + MONGO_DATABASE: "LightRAG" + +envFrom: + configmaps: [] + secrets: [] diff --git a/documentdb-playground/lightrag/helm/lightrag/Chart.yaml b/documentdb-playground/lightrag/helm/lightrag/Chart.yaml new file mode 100644 index 00000000..f716977c --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: lightrag +description: LightRAG graph-based RAG engine with DocumentDB backend +type: application +version: 0.1.0 +appVersion: "1.4.13" diff --git a/documentdb-playground/lightrag/helm/lightrag/templates/NOTES.txt b/documentdb-playground/lightrag/helm/lightrag/templates/NOTES.txt new file mode 100644 index 00000000..f44bf8ae --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/templates/NOTES.txt @@ -0,0 +1,20 @@ +LightRAG has been deployed with DocumentDB backend. + +1. Get the application URL: +{{- if eq .Values.service.type "ClusterIP" }} + kubectl port-forward svc/{{ include "lightrag.fullname" . }} -n {{ .Release.Namespace }} 9621:{{ .Values.service.port }} + Then visit: http://localhost:9621 +{{- end }} + +2. Check health: + curl http://localhost:9621/health + +3. Insert a document: + curl -X POST http://localhost:9621/documents/text \ + -H "Content-Type: application/json" \ + -d '{"text": "Your text here"}' + +4. Query the knowledge graph: + curl -X POST http://localhost:9621/query \ + -H "Content-Type: application/json" \ + -d '{"query": "Your question", "mode": "hybrid"}' diff --git a/documentdb-playground/lightrag/helm/lightrag/templates/_helpers.tpl b/documentdb-playground/lightrag/helm/lightrag/templates/_helpers.tpl new file mode 100644 index 00000000..0621e8cf --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/templates/_helpers.tpl @@ -0,0 +1,27 @@ +{{- define "lightrag.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "lightrag.fullname" -}} +{{- default .Release.Name .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "lightrag.labels" -}} +app.kubernetes.io/name: {{ include "lightrag.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{- define "lightrag.selectorLabels" -}} +app.kubernetes.io/name: {{ include "lightrag.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{- define "lightrag.envContent" -}} +{{- $first := true -}} +{{- range $key, $val := .Values.env -}} +{{- if not $first -}}{{- "\n" -}}{{- end -}} +{{- $first = false -}} +{{ $key }}={{ $val }} +{{- end -}} +{{- end -}} diff --git a/documentdb-playground/lightrag/helm/lightrag/templates/deployment.yaml b/documentdb-playground/lightrag/helm/lightrag/templates/deployment.yaml new file mode 100644 index 00000000..26bff204 --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/templates/deployment.yaml @@ -0,0 +1,135 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "lightrag.fullname" . }} + labels: + {{- include "lightrag.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "lightrag.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include "lightrag.envContent" . | sha256sum }} + labels: + {{- include "lightrag.selectorLabels" . | nindent 8 }} + spec: + # Init container patches LightRAG's MongoDB storage layer for DocumentDB + # compatibility. DocumentDB does not support createIndex with collation, + # $listSearchIndexes, or secondary index creation (it hangs). The patch + # stubs out these calls so initialization completes cleanly. + initContainers: + - name: patch-for-documentdb + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + command: ["/app/.venv/bin/python3", "-B", "-c"] + args: + - | + import os, glob + + targets = [ + "/app/lightrag/kg/mongo_impl.py", + "/app/.venv/lib/python3.12/site-packages/lightrag/kg/mongo_impl.py", + ] + for fp in targets: + if not os.path.exists(fp): + continue + with open(fp) as f: + content = f.read() + if "Skipping index creation (DocumentDB)" in content: + print(f"Already patched: {fp}") + continue + patched = content + patched = patched.replace( + "async def create_and_migrate_indexes_if_not_exists(self):", + "async def create_and_migrate_indexes_if_not_exists(self):\n" + " logger.info(f'[{self.workspace}] Skipping index creation (DocumentDB)')\n" + " return\n" + " async def _orig_create_indexes(self):", + ) + patched = patched.replace( + "async def create_search_index_if_not_exists(self):", + "async def create_search_index_if_not_exists(self):\n" + " logger.info(f'[{self.workspace}] Skipping search index (DocumentDB)')\n" + " return\n" + " async def _orig_create_search_index(self):", + ) + patched = patched.replace( + "async def create_vector_index_if_not_exists(self):", + "async def create_vector_index_if_not_exists(self):\n" + " logger.info('Skipping vector index (DocumentDB)')\n" + " return\n" + " async def _orig_create_vector_index(self):", + ) + with open(fp, "w") as f: + f.write(patched) + cache_dir = os.path.join(os.path.dirname(fp), "__pycache__") + for cf in glob.glob(os.path.join(cache_dir, "mongo_impl*")): + os.remove(cf) + print(f"Patched: {fp}") + print("DocumentDB compatibility patches applied") + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: IfNotPresent + command: ["sh", "-c"] + args: + - exec lightrag-server + ports: + - name: http + containerPort: {{ .Values.env.PORT }} + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 10 + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: rag-storage + mountPath: /app/data/rag_storage + - name: inputs + mountPath: /app/data/inputs + - name: env-file + mountPath: /app/.env + subPath: .env + {{- $envFrom := default (dict) .Values.envFrom }} + {{- $envFromEntries := list }} + {{- range (default (list) (index $envFrom "secrets")) }} + {{- $envFromEntries = append $envFromEntries (dict "secretRef" (dict "name" .name)) }} + {{- end }} + {{- range (default (list) (index $envFrom "configmaps")) }} + {{- $envFromEntries = append $envFromEntries (dict "configMapRef" (dict "name" .name)) }} + {{- end }} + {{- if gt (len $envFromEntries) 0 }} + envFrom: +{{- toYaml $envFromEntries | nindent 12 }} + {{- end }} + {{- with .Values.image.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: env-file + secret: + secretName: {{ include "lightrag.fullname" . }}-env + {{- if .Values.persistence.enabled }} + - name: rag-storage + persistentVolumeClaim: + claimName: {{ include "lightrag.fullname" . }}-rag-storage + - name: inputs + persistentVolumeClaim: + claimName: {{ include "lightrag.fullname" . }}-inputs + {{- else }} + - name: rag-storage + emptyDir: {} + - name: inputs + emptyDir: {} + {{- end }} + strategy: + {{- toYaml .Values.updateStrategy | nindent 4 }} diff --git a/documentdb-playground/lightrag/helm/lightrag/templates/pvc.yaml b/documentdb-playground/lightrag/helm/lightrag/templates/pvc.yaml new file mode 100644 index 00000000..83889741 --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/templates/pvc.yaml @@ -0,0 +1,27 @@ +{{- if .Values.persistence.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "lightrag.fullname" . }}-rag-storage + labels: + {{- include "lightrag.labels" . | nindent 4 }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.persistence.ragStorage.size }} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "lightrag.fullname" . }}-inputs + labels: + {{- include "lightrag.labels" . | nindent 4 }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.persistence.inputs.size }} +{{- end }} diff --git a/documentdb-playground/lightrag/helm/lightrag/templates/secret.yaml b/documentdb-playground/lightrag/helm/lightrag/templates/secret.yaml new file mode 100644 index 00000000..8f403ab8 --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/templates/secret.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "lightrag.fullname" . }}-env + labels: + {{- include "lightrag.labels" . | nindent 4 }} +type: Opaque +stringData: + .env: |- + {{- include "lightrag.envContent" . | nindent 4 }} diff --git a/documentdb-playground/lightrag/helm/lightrag/templates/service.yaml b/documentdb-playground/lightrag/helm/lightrag/templates/service.yaml new file mode 100644 index 00000000..e5525e18 --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "lightrag.fullname" . }} + labels: + {{- include "lightrag.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.env.PORT }} + protocol: TCP + name: http + selector: + {{- include "lightrag.selectorLabels" . | nindent 4 }} diff --git a/documentdb-playground/lightrag/helm/lightrag/values.yaml b/documentdb-playground/lightrag/helm/lightrag/values.yaml new file mode 100644 index 00000000..c7af9d43 --- /dev/null +++ b/documentdb-playground/lightrag/helm/lightrag/values.yaml @@ -0,0 +1,53 @@ +replicaCount: 1 + +image: + repository: ghcr.io/hkuds/lightrag + tag: latest + imagePullSecrets: [] + +nameOverride: "" +fullnameOverride: "" + +updateStrategy: + type: Recreate + +service: + type: ClusterIP + port: 9621 + +resources: + limits: + cpu: 1000m + memory: 2Gi + requests: + cpu: 500m + memory: 1Gi + +persistence: + enabled: true + ragStorage: + size: 5Gi + inputs: + size: 2Gi + +env: + HOST: "0.0.0.0" + PORT: "9621" + LLM_BINDING: ollama + LLM_MODEL: qwen2.5:3b + LLM_BINDING_HOST: "http://ollama.lightrag.svc.cluster.local:11434" + LLM_BINDING_API_KEY: "" + EMBEDDING_BINDING: ollama + EMBEDDING_MODEL: nomic-embed-text + EMBEDDING_DIM: "768" + EMBEDDING_BINDING_API_KEY: "" + LIGHTRAG_KV_STORAGE: MongoKVStorage + LIGHTRAG_VECTOR_STORAGE: NanoVectorDBStorage + LIGHTRAG_GRAPH_STORAGE: MongoGraphStorage + LIGHTRAG_DOC_STATUS_STORAGE: MongoDocStatusStorage + MONGO_URI: "" + MONGO_DATABASE: "LightRAG" + +envFrom: + configmaps: [] + secrets: [] diff --git a/documentdb-playground/lightrag/helm/ollama.yaml b/documentdb-playground/lightrag/helm/ollama.yaml new file mode 100644 index 00000000..d07252ad --- /dev/null +++ b/documentdb-playground/lightrag/helm/ollama.yaml @@ -0,0 +1,56 @@ +# Ollama deployment for LightRAG LLM and embedding inference. +# After the pod is running, pull models with: +# OLLAMA_POD=$(kubectl get pod -l app=ollama -n lightrag -o jsonpath='{.items[0].metadata.name}') +# kubectl exec -n lightrag "$OLLAMA_POD" -- ollama pull nomic-embed-text +# kubectl exec -n lightrag "$OLLAMA_POD" -- ollama pull qwen2.5:3b +apiVersion: v1 +kind: Namespace +metadata: + name: lightrag +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: lightrag +spec: + replicas: 1 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + ports: + - containerPort: 11434 + resources: + requests: + cpu: 500m + memory: 3Gi + limits: + cpu: "4" + memory: 4Gi + volumeMounts: + - name: ollama-data + mountPath: /root/.ollama + volumes: + - name: ollama-data + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: lightrag +spec: + selector: + app: ollama + ports: + - port: 11434 + targetPort: 11434 + type: ClusterIP diff --git a/documentdb-playground/lightrag/scripts/cleanup.sh b/documentdb-playground/lightrag/scripts/cleanup.sh new file mode 100755 index 00000000..d76a87b7 --- /dev/null +++ b/documentdb-playground/lightrag/scripts/cleanup.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Remove LightRAG and Ollama from the cluster. +set -euo pipefail + +NAMESPACE="${LIGHTRAG_NAMESPACE:-lightrag}" + +echo "=== Cleaning up LightRAG deployment ===" + +echo "Uninstalling LightRAG Helm release..." +helm uninstall lightrag -n "$NAMESPACE" 2>/dev/null || true + +echo "Deleting PVCs..." +kubectl delete pvc -l app.kubernetes.io/name=lightrag -n "$NAMESPACE" 2>/dev/null || true + +echo "Deleting Ollama..." +kubectl delete deployment ollama -n "$NAMESPACE" 2>/dev/null || true +kubectl delete service ollama -n "$NAMESPACE" 2>/dev/null || true + +echo "Deleting namespace..." +kubectl delete namespace "$NAMESPACE" 2>/dev/null || true + +echo "Cleanup complete." diff --git a/documentdb-playground/lightrag/scripts/deploy.sh b/documentdb-playground/lightrag/scripts/deploy.sh new file mode 100755 index 00000000..7bdef243 --- /dev/null +++ b/documentdb-playground/lightrag/scripts/deploy.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Deploy LightRAG with DocumentDB backend on a Kubernetes cluster. +# Prerequisites: kubectl, helm, a running cluster with DocumentDB deployed. +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CHART_DIR="$SCRIPT_DIR/../helm/lightrag" +VALUES_FILE="$SCRIPT_DIR/../helm/lightrag-values.yaml" +OLLAMA_MANIFEST="$SCRIPT_DIR/../helm/ollama.yaml" +NAMESPACE="${LIGHTRAG_NAMESPACE:-lightrag}" +DOCUMENTDB_NAMESPACE="${DOCUMENTDB_NAMESPACE:-documentdb-demo}" +DOCUMENTDB_CLUSTER="${DOCUMENTDB_CLUSTER:-my-cluster}" + +echo "=== LightRAG + DocumentDB Deployment ===" + +# 1. Create namespace and deploy Ollama +echo "" +echo "--- Step 1: Deploy Ollama ---" +kubectl apply -f "$OLLAMA_MANIFEST" +echo "Waiting for Ollama pod to be ready..." +kubectl wait --for=condition=Ready pod -l app=ollama -n "$NAMESPACE" --timeout=120s + +# 2. Pull models +echo "" +echo "--- Step 2: Pull LLM and embedding models ---" +OLLAMA_POD=$(kubectl get pod -l app=ollama -n "$NAMESPACE" -o jsonpath='{.items[0].metadata.name}') +echo "Pulling nomic-embed-text (embedding, ~274MB)..." +kubectl exec -n "$NAMESPACE" "$OLLAMA_POD" -- ollama pull nomic-embed-text +echo "Pulling qwen2.5:3b (LLM, ~1.9GB)..." +kubectl exec -n "$NAMESPACE" "$OLLAMA_POD" -- ollama pull qwen2.5:3b + +# 3. Get DocumentDB connection details +echo "" +echo "--- Step 3: DocumentDB connection ---" +SVC_NAME="documentdb-service-${DOCUMENTDB_CLUSTER}" +SVC_HOST="${SVC_NAME}.${DOCUMENTDB_NAMESPACE}.svc.cluster.local" +# Try to extract credentials from the DocumentDB secret +SECRET_NAME="${DOCUMENTDB_CLUSTER}-superuser" +if kubectl get secret "$SECRET_NAME" -n "$DOCUMENTDB_NAMESPACE" &>/dev/null; then + DB_USER=$(kubectl get secret "$SECRET_NAME" -n "$DOCUMENTDB_NAMESPACE" -o jsonpath='{.data.username}' | base64 -d) + DB_PASS=$(kubectl get secret "$SECRET_NAME" -n "$DOCUMENTDB_NAMESPACE" -o jsonpath='{.data.password}' | base64 -d) +else + echo "Could not find secret $SECRET_NAME in namespace $DOCUMENTDB_NAMESPACE." + echo "Please set MONGO_URI in $VALUES_FILE manually." + DB_USER="admin" + DB_PASS="CHANGEME" +fi +MONGO_URI="mongodb://${DB_USER}:${DB_PASS}@${SVC_HOST}:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&tlsAllowInvalidCertificates=true" +echo "DocumentDB endpoint: ${SVC_HOST}:10260" + +# 4. Deploy LightRAG via Helm +echo "" +echo "--- Step 4: Deploy LightRAG ---" +helm upgrade --install lightrag "$CHART_DIR" \ + -n "$NAMESPACE" \ + -f "$VALUES_FILE" \ + --set "env.MONGO_URI=$MONGO_URI" \ + --wait --timeout 5m +echo "Waiting for LightRAG pod to be ready..." +kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=lightrag -n "$NAMESPACE" --timeout=300s + +echo "" +echo "=== Deployment complete ===" +echo "" +echo "Access LightRAG:" +echo " kubectl port-forward svc/lightrag -n $NAMESPACE 9621:9621" +echo " open http://localhost:9621" +echo "" +echo "Insert a document:" +echo " curl -X POST http://localhost:9621/documents/text \\" +echo " -H 'Content-Type: application/json' \\" +echo " -d '{\"text\": \"Your text here\"}'" +echo "" +echo "Query:" +echo " curl -X POST http://localhost:9621/query \\" +echo " -H 'Content-Type: application/json' \\" +echo " -d '{\"query\": \"Your question\", \"mode\": \"hybrid\"}'" From bc1c7229eb11422234b4316db64fa4c845d7b043 Mon Sep 17 00:00:00 2001 From: German Date: Thu, 2 Apr 2026 12:52:27 -0700 Subject: [PATCH 2/4] fix: use DocumentDB status.connectionString for connection details Replace manual service/secret lookups with the connection string from the DocumentDB resource status field, matching the pattern documented in the official networking docs. The status field contains embedded kubectl commands that are resolved via eval. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- documentdb-playground/lightrag/README.md | 16 ++++++------- .../lightrag/scripts/deploy.sh | 24 +++++++++---------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/documentdb-playground/lightrag/README.md b/documentdb-playground/lightrag/README.md index 0cbe8c9b..e3a58afa 100644 --- a/documentdb-playground/lightrag/README.md +++ b/documentdb-playground/lightrag/README.md @@ -111,19 +111,17 @@ env: MONGO_DATABASE: "LightRAG" ``` -To get your connection details: +To get your connection string from the DocumentDB resource status: ```bash -# Get the gateway service -kubectl get svc -n | grep documentdb-service - -# Get credentials -kubectl get secret documentdb-credentials -n \ - -o jsonpath='{.data.username}' | base64 -d -kubectl get secret documentdb-credentials -n \ - -o jsonpath='{.data.password}' | base64 -d +# The connection string contains embedded kubectl commands for credentials. +# Use eval to resolve them into a usable URI. +CONNECTION_STRING=$(eval echo "$(kubectl get documentdb -n -o jsonpath='{.status.connectionString}')") +echo "$CONNECTION_STRING" ``` +> **Note:** The `eval` command executes shell expansions in the connection string. This is safe when the string comes from your own DocumentDB resource, but never pipe untrusted input through `eval`. + ### LLM Configuration The default configuration uses [Ollama](https://ollama.com) with `qwen2.5:3b` for text generation and `nomic-embed-text` for embeddings. To use OpenAI instead: diff --git a/documentdb-playground/lightrag/scripts/deploy.sh b/documentdb-playground/lightrag/scripts/deploy.sh index 7bdef243..edb6a14b 100755 --- a/documentdb-playground/lightrag/scripts/deploy.sh +++ b/documentdb-playground/lightrag/scripts/deploy.sh @@ -29,24 +29,22 @@ kubectl exec -n "$NAMESPACE" "$OLLAMA_POD" -- ollama pull nomic-embed-text echo "Pulling qwen2.5:3b (LLM, ~1.9GB)..." kubectl exec -n "$NAMESPACE" "$OLLAMA_POD" -- ollama pull qwen2.5:3b -# 3. Get DocumentDB connection details +# 3. Get DocumentDB connection string from resource status echo "" echo "--- Step 3: DocumentDB connection ---" -SVC_NAME="documentdb-service-${DOCUMENTDB_CLUSTER}" -SVC_HOST="${SVC_NAME}.${DOCUMENTDB_NAMESPACE}.svc.cluster.local" -# Try to extract credentials from the DocumentDB secret -SECRET_NAME="${DOCUMENTDB_CLUSTER}-superuser" -if kubectl get secret "$SECRET_NAME" -n "$DOCUMENTDB_NAMESPACE" &>/dev/null; then - DB_USER=$(kubectl get secret "$SECRET_NAME" -n "$DOCUMENTDB_NAMESPACE" -o jsonpath='{.data.username}' | base64 -d) - DB_PASS=$(kubectl get secret "$SECRET_NAME" -n "$DOCUMENTDB_NAMESPACE" -o jsonpath='{.data.password}' | base64 -d) +RAW_CONN=$(kubectl get documentdb "$DOCUMENTDB_CLUSTER" -n "$DOCUMENTDB_NAMESPACE" \ + -o jsonpath='{.status.connectionString}' 2>/dev/null) || true + +if [ -n "$RAW_CONN" ]; then + # The connection string contains embedded kubectl commands for credentials. + # eval resolves them into a usable URI. + MONGO_URI=$(eval echo "$RAW_CONN") + echo "Connection string retrieved from DocumentDB status." else - echo "Could not find secret $SECRET_NAME in namespace $DOCUMENTDB_NAMESPACE." + echo "Could not read status.connectionString from DocumentDB resource." echo "Please set MONGO_URI in $VALUES_FILE manually." - DB_USER="admin" - DB_PASS="CHANGEME" + MONGO_URI="" fi -MONGO_URI="mongodb://${DB_USER}:${DB_PASS}@${SVC_HOST}:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&tlsAllowInvalidCertificates=true" -echo "DocumentDB endpoint: ${SVC_HOST}:10260" # 4. Deploy LightRAG via Helm echo "" From ef93862eaa10b513120fc57056ac495bdf848c23 Mon Sep 17 00:00:00 2001 From: German Date: Thu, 2 Apr 2026 12:56:08 -0700 Subject: [PATCH 3/4] docs: add init-container patch explanation to README Document the three patched methods, why each is needed for DocumentDB compatibility, how the init container applies the patches, and the impact on functionality. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- documentdb-playground/lightrag/README.md | 25 +++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/documentdb-playground/lightrag/README.md b/documentdb-playground/lightrag/README.md index e3a58afa..57169191 100644 --- a/documentdb-playground/lightrag/README.md +++ b/documentdb-playground/lightrag/README.md @@ -161,7 +161,30 @@ LightRAG's MongoDB storage assumes MongoDB Atlas features. This playground inclu | Basic CRUD operations | ✅ | ✅ | Works natively | | Aggregation pipelines | ✅ | ✅ | `$group`, `$match`, `$sort` work | -The init container applies these patches automatically — no manual configuration is needed. +### How the Init-Container Patches Work + +The Helm chart's `deployment.yaml` includes an init container (`patch-for-documentdb`) that runs before the main LightRAG container starts. It modifies LightRAG's MongoDB storage layer in-place to skip operations that are incompatible with DocumentDB. + +**What gets patched:** + +Three async methods in `lightrag/kg/mongo_impl.py` are stubbed out with an early `return`: + +| Method | Why it's patched | +|---|---| +| `create_and_migrate_indexes_if_not_exists` | Calls `createIndex` with collation and secondary indexes. DocumentDB rejects collation (`"not implemented yet"`) and hangs indefinitely on secondary index creation. | +| `create_search_index_if_not_exists` | Calls `$listSearchIndexes` which DocumentDB doesn't support. While LightRAG catches the `PyMongoError` gracefully, skipping it avoids unnecessary error logs. | +| `create_vector_index_if_not_exists` | Creates Atlas `$vectorSearch` indexes. Not applicable because this playground uses `NanoVectorDBStorage` (local) instead of `MongoVectorDBStorage`. | + +**How it works:** + +1. The init container shares the same image as the main LightRAG container. +2. A Python script inserts `return` statements at the top of each method, effectively making them no-ops. +3. Both code locations are patched — `/app/lightrag/` (dev install) and `/app/.venv/lib/python3.12/site-packages/lightrag/` (venv install) — because the LightRAG Docker image includes two copies. +4. Bytecode caches (`__pycache__/mongo_impl*.pyc`) are cleared to prevent stale compiled code from being loaded. + +**Impact:** LightRAG operates normally without indexes. All CRUD, aggregation, and graph traversal operations work correctly. The only trade-off is that queries on large datasets may be slower without secondary indexes, which is acceptable for a playground. + +The patches are applied automatically — no manual configuration is needed. ## Verified Operations From 23d4fc7d3cc2bb605be61d6f705fce1ef1cc7514 Mon Sep 17 00:00:00 2001 From: German Date: Thu, 2 Apr 2026 15:21:57 -0700 Subject: [PATCH 4/4] fix: fix eval quoting and add DNS resolution in deploy script Findings from AKS E2E testing of the KEDA playground apply here too: - Fix eval quoting: use eval "echo \"...\"" to prevent & in connection string query params from being interpreted as shell background operator - Replace ClusterIP with DNS name for cross-namespace service resolution (status.connectionString uses ClusterIP) - Update README eval example to use the corrected two-step pattern Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- documentdb-playground/lightrag/README.md | 3 ++- documentdb-playground/lightrag/scripts/deploy.sh | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/documentdb-playground/lightrag/README.md b/documentdb-playground/lightrag/README.md index 57169191..093a460b 100644 --- a/documentdb-playground/lightrag/README.md +++ b/documentdb-playground/lightrag/README.md @@ -116,7 +116,8 @@ To get your connection string from the DocumentDB resource status: ```bash # The connection string contains embedded kubectl commands for credentials. # Use eval to resolve them into a usable URI. -CONNECTION_STRING=$(eval echo "$(kubectl get documentdb -n -o jsonpath='{.status.connectionString}')") +RAW_CONN=$(kubectl get documentdb -n -o jsonpath='{.status.connectionString}') +CONNECTION_STRING=$(eval "echo \"$RAW_CONN\"") echo "$CONNECTION_STRING" ``` diff --git a/documentdb-playground/lightrag/scripts/deploy.sh b/documentdb-playground/lightrag/scripts/deploy.sh index edb6a14b..3399b2c6 100755 --- a/documentdb-playground/lightrag/scripts/deploy.sh +++ b/documentdb-playground/lightrag/scripts/deploy.sh @@ -37,8 +37,17 @@ RAW_CONN=$(kubectl get documentdb "$DOCUMENTDB_CLUSTER" -n "$DOCUMENTDB_NAMESPAC if [ -n "$RAW_CONN" ]; then # The connection string contains embedded kubectl commands for credentials. - # eval resolves them into a usable URI. - MONGO_URI=$(eval echo "$RAW_CONN") + # eval resolves them into a usable URI. The inner quoting prevents & from + # being interpreted as a shell background operator. + MONGO_URI=$(eval "echo \"$RAW_CONN\"") + + # Replace ClusterIP with DNS name for cross-namespace resolution. + SVC_IP=$(kubectl get svc "documentdb-service-${DOCUMENTDB_CLUSTER}" -n "$DOCUMENTDB_NAMESPACE" -o jsonpath='{.spec.clusterIP}' 2>/dev/null) || true + if [ -n "$SVC_IP" ]; then + SVC_DNS="documentdb-service-${DOCUMENTDB_CLUSTER}.${DOCUMENTDB_NAMESPACE}.svc.cluster.local" + MONGO_URI=$(echo "$MONGO_URI" | sed "s/$SVC_IP/$SVC_DNS/g") + fi + echo "Connection string retrieved from DocumentDB status." else echo "Could not read status.connectionString from DocumentDB resource."