diff --git a/.app_env_example b/.app_env_example index c552d51..3fd3e06 100644 --- a/.app_env_example +++ b/.app_env_example @@ -13,6 +13,10 @@ source POSTGRES_PASSWORD=postgres source APP_ROOT = hopper/ +# MCP Knowledge Base Configuration +source KB_MCP_HOST=http://host.docker.internal:8002 +source KB_MCP_JWT_TOKEN=your-jwt-token # JWT token from OIDC token endpoint + # JWT config source IDP_OIDC_PRIVATE_KEY="-----BEGIN PRIVATE KEY----- \ \ diff --git a/hospexplorer/ask/kb_connector.py b/hospexplorer/ask/kb_connector.py new file mode 100644 index 0000000..2e610a4 --- /dev/null +++ b/hospexplorer/ask/kb_connector.py @@ -0,0 +1,30 @@ +import logging + +import httpx +from django.conf import settings + +logger = logging.getLogger(__name__) + + +def list_kb_documents(page=1, page_size=10): + """Call MCP KB docs/list endpoint. Returns parsed JSON response. + + Response format: {total, page, page_size, documents: [{id, title, url, chunks: [...]}]} + """ + headers = { + "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}", + "Content-Type": "application/json", + } + url = f"{settings.KB_MCP_HOST}/docs/list" + params = {"page": page, "page_size": page_size} + + with httpx.Client() as client: + response = client.get( + url, + headers=headers, + params=params, + timeout=settings.KB_MCP_TIMEOUT, + ) + + response.raise_for_status() + return response.json() diff --git a/hospexplorer/ask/templates/_base.html b/hospexplorer/ask/templates/_base.html index c758c3f..5220741 100644 --- a/hospexplorer/ask/templates/_base.html +++ b/hospexplorer/ask/templates/_base.html @@ -53,6 +53,16 @@ {% if user.is_authenticated %} +
+ + + + + Knowledge Base + +
+ {% endif %} + {% if user.is_authenticated %}
+ + ← Back to Chat + +
+

Knowledge Base Resources

+ +
+ + +
+ + +
+ + +
+ Sync Complete: + + +
+ + +
+
+ + + + + + + + + + + {% for resource in page_obj %} + + + + + + + {% empty %} + + + + {% endfor %} + +
TitleURLLast IncludedKB Status
{{ resource.title }} + + {{ resource.url|truncatechars:60 }} + + {{ resource.modified_at|date:"N j, Y, P" }} + -- +
No resources found in the internal database.
+
+
+ + +
+
Documents in KB but not tracked internally
+
+
+ + + + + + + + + + +
TitleURL
+
+
+
+ + + {% if page_obj.has_other_pages %} + + {% endif %} +
+ + + + +{% endblock %} diff --git a/hospexplorer/ask/urls.py b/hospexplorer/ask/urls.py index 105b586..29bd697 100644 --- a/hospexplorer/ask/urls.py +++ b/hospexplorer/ask/urls.py @@ -13,4 +13,6 @@ re_path(r"^terms/$", views.terms_view, name="terms-view"), re_path(r"^terms/accept/$", views.terms_accept, name="terms-accept"), re_path(r"^history/delete$", views.delete_history, name="delete-history"), + re_path(r"^kb/$", views.kb_resources, name="kb-resources"), + re_path(r"^kb/sync/$", views.kb_sync, name="kb-sync"), ] diff --git a/hospexplorer/ask/views.py b/hospexplorer/ask/views.py index 8a9eb31..d1a549c 100644 --- a/hospexplorer/ask/views.py +++ b/hospexplorer/ask/views.py @@ -8,8 +8,12 @@ from django.contrib.auth.decorators import login_required from django.views.decorators.http import require_GET, require_POST, require_http_methods -from ask.models import Conversation, QARecord, QueryTask, TermsAcceptance +import httpx +from django.core.paginator import Paginator + +from ask.models import Conversation, QARecord, QueryTask, TermsAcceptance, WebsiteResource from ask.tasks import run_llm_task +from ask.kb_connector import list_kb_documents logger = logging.getLogger(__name__) @@ -176,3 +180,90 @@ def terms_view(request): def delete_history(request): request.user.conversations.all().delete() return JsonResponse({"message": "All conversations deleted successfully!"}) + + +@login_required +def kb_resources(request): + """Display paginated list of Knowledge Base resources from internal DB.""" + resources = WebsiteResource.objects.all().order_by("-modified_at") + paginator = Paginator(resources, settings.KB_RESOURCES_PAGE_SIZE) + page_number = request.GET.get("page", 1) + page_obj = paginator.get_page(page_number) + return render(request, "kb/resources.html", {"page_obj": page_obj}) + + +@login_required +@require_POST +def kb_sync(request): + """Compare internal WebsiteResource records with MCP KB documents. + + How it works: + 1. Paginates through ALL documents from the MCP KB server via GET /docs/list + (each doc has: id, title, url, chunks). Collects all KB doc URLs into a set. + 2. Iterates over all internal WebsiteResource records from Django's DB. + Compares each resource's URL against the KB URL set: + - "in_kb": resource URL exists in KB — the resource is indexed + - "missing_from_kb": resource URL NOT in KB — needs to be added/re-ingested + 3. Also finds "untracked" docs: URLs present in the KB but not in the + internal WebsiteResource table (added to KB outside of this app). + + Returns JSON to the frontend: + - resources: list of {id, url, title, status} for each internal resource + - untracked: list of {url, title} for KB docs not tracked internally + - kb_total: total documents in the KB + - internal_total: total WebsiteResource records in Django DB + """ + try: + kb_docs = [] + page = 1 + while True: + data = list_kb_documents(page=page, page_size=50) + kb_docs.extend(data.get("documents", [])) + if len(kb_docs) >= data.get("total", 0): + break + page += 1 + + kb_urls = {doc["url"] for doc in kb_docs if doc.get("url")} + + # compare each internal WebsiteResource urls against KB URLs + internal_resources = WebsiteResource.objects.all() + results = [] + internal_urls = set() + for resource in internal_resources: + internal_urls.add(resource.url) + results.append({ + "id": resource.id, + "url": resource.url, + "title": resource.title, + "status": "in_kb" if resource.url in kb_urls else "missing_from_kb", + }) + + untracked = [ + {"url": doc["url"], "title": doc["title"]} + for doc in kb_docs + if doc.get("url") and doc["url"] not in internal_urls + ] + + return JsonResponse({ + "success": True, + "resources": results, + "untracked": untracked, + "kb_total": len(kb_docs), + "internal_total": len(results), + }) + except httpx.ConnectError: + return JsonResponse({ + "success": False, + "error": "Could not connect to the Knowledge Base server.", + }, status=503) + except httpx.HTTPStatusError as e: + return JsonResponse({ + "success": False, + "error": f"Knowledge Base server returned an error (HTTP {e.response.status_code}).", + }, status=502) + except Exception: + logger.exception("KB sync failed") + return JsonResponse({ + "success": False, + "error": "An unexpected error occurred during sync.", + }, status=500) diff --git a/hospexplorer/hospexplorer/settings.py b/hospexplorer/hospexplorer/settings.py index a988488..986ea53 100644 --- a/hospexplorer/hospexplorer/settings.py +++ b/hospexplorer/hospexplorer/settings.py @@ -161,6 +161,14 @@ LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", 120)) LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096")) +# MCP Knowledge Base Configuration +KB_MCP_HOST = os.getenv("KB_MCP_HOST", "http://localhost:8002") +KB_MCP_JWT_TOKEN = os.getenv("KB_MCP_JWT_TOKEN", "") +KB_MCP_TIMEOUT = int(os.getenv("KB_MCP_TIMEOUT", 30)) + +# Number of resources to fetch per page +KB_RESOURCES_PAGE_SIZE = int(os.getenv("KB_RESOURCES_PAGE_SIZE", 20)) + # Sidebar conversations limit SIDEBAR_CONVERSATIONS_LIMIT = int(os.getenv("SIDEBAR_CONVERSATIONS_LIMIT", 10))