Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .app_env_example
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ source POSTGRES_PASSWORD=postgres

source APP_ROOT = hopper/

# MCP Knowledge Base Configuration
source KB_MCP_HOST=http://host.docker.internal:8002
source KB_MCP_JWT_TOKEN=your-jwt-token # JWT token from OIDC token endpoint

# JWT config
source IDP_OIDC_PRIVATE_KEY="-----BEGIN PRIVATE KEY----- \
<paste key here> \
Expand Down
30 changes: 30 additions & 0 deletions hospexplorer/ask/kb_connector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging

import httpx
from django.conf import settings

logger = logging.getLogger(__name__)


def list_kb_documents(page=1, page_size=10):
"""Call MCP KB docs/list endpoint. Returns parsed JSON response.

Response format: {total, page, page_size, documents: [{id, title, url, chunks: [...]}]}
"""
headers = {
"Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}",
"Content-Type": "application/json",
}
url = f"{settings.KB_MCP_HOST}/docs/list"
params = {"page": page, "page_size": page_size}

with httpx.Client() as client:
response = client.get(
url,
headers=headers,
params=params,
timeout=settings.KB_MCP_TIMEOUT,
)

response.raise_for_status()
return response.json()
10 changes: 10 additions & 0 deletions hospexplorer/ask/templates/_base.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@
</div>
</div>
{% if user.is_authenticated %}
<div class="border-top p-2">
<a href="{% url 'ask:kb-resources' %}" class="btn btn-outline-secondary btn-sm w-100 text-start">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1 mb-1" viewBox="0 0 16 16">
<path d="M1 2.828c.885-.37 2.154-.769 3.388-.893 1.33-.134 2.458.063 3.112.752v9.746c-.935-.53-2.12-.603-3.213-.493-1.18.12-2.37.461-3.287.811V2.828zm7.5-.141c.654-.689 1.782-.886 3.112-.752 1.234.124 2.503.523 3.388.893v9.923c-.918-.35-2.107-.692-3.287-.81-1.094-.111-2.278-.039-3.213.492V2.687zM8 1.783C7.015.936 5.587.81 4.287.94c-1.514.153-3.042.672-3.994 1.105A.5.5 0 0 0 0 2.5v11a.5.5 0 0 0 .707.455c.882-.4 2.303-.881 3.68-1.02 1.409-.142 2.59.087 3.223.877a.5.5 0 0 0 .78 0c.633-.79 1.814-1.019 3.222-.877 1.378.139 2.8.62 3.681 1.02A.5.5 0 0 0 16 13.5v-11a.5.5 0 0 0-.293-.455c-.952-.433-2.48-.952-3.994-1.105C10.413.809 8.985.936 8 1.783z"/>
</svg>
Knowledge Base
</a>
</div>
{% endif %}
{% if user.is_authenticated %}
<!-- User profile at bottom -->
<div class="border-top p-3 mt-auto dropup">
<a href="#" class="d-flex align-items-center text-decoration-none"
Expand Down
187 changes: 187 additions & 0 deletions hospexplorer/ask/templates/kb/resources.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
{% extends "_base.html" %}

{% block content %}
<div class="mt-4 mb-4" x-data="kbSync()">
<a href="{% url 'ask:index' %}" class="text-decoration-none mb-3 d-inline-block" style="color: rgb(197, 66, 130);">
&larr; Back to Chat
</a>
<div class="d-flex justify-content-between align-items-center mb-3">
<h2 class="mb-0">Knowledge Base Resources</h2>
<button @click="sync()" :disabled="syncing" class="btn text-white" style="background-color: #8C1D40;">
<span x-show="!syncing">Sync with Knowledge Base</span>
<span x-show="syncing">
<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span>
Syncing...
</span>
</button>
</div>

<!-- Sync error -->
<div x-show="syncError" x-cloak class="alert alert-danger alert-dismissible fade show">
<span x-text="syncError"></span>
<button type="button" class="btn-close" @click="syncError = null"></button>
</div>

<!-- Sync summary -->
<div x-show="syncResult" x-cloak class="alert alert-info alert-dismissible fade show">
<strong>Sync Complete:</strong>
<span x-text="syncSummary"></span>
<button type="button" class="btn-close" @click="syncResult = null"></button>
</div>

<!-- Resources table -->
<div class="card">
<div class="table-responsive">
<table class="table table-hover mb-0">
<thead class="table-light">
<tr>
<th>Title</th>
<th>URL</th>
<th>Last Included</th>
<th>KB Status</th>
</tr>
</thead>
<tbody>
{% for resource in page_obj %}
<tr :class="getRowClass({{ resource.id }})">
<td>{{ resource.title }}</td>
<td>
<a href="{{ resource.url }}" target="_blank" rel="noopener" class="text-truncate d-inline-block" style="max-width: 300px;">
{{ resource.url|truncatechars:60 }}
</a>
</td>
<td>{{ resource.modified_at|date:"N j, Y, P" }}</td>
<td>
<span x-text="getStatus({{ resource.id }})">--</span>
</td>
</tr>
{% empty %}
<tr>
<td colspan="4" class="text-muted text-center py-4">No resources found in the internal database.</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>

<!-- Untracked KB documents -->
<div x-show="untracked.length > 0" x-cloak class="mt-4">
<h5>Documents in KB but not tracked internally</h5>
<div class="card">
<div class="table-responsive">
<table class="table table-hover mb-0">
<thead class="table-light">
<tr>
<th>Title</th>
<th>URL</th>
</tr>
</thead>
<tbody>
<template x-for="doc in untracked" :key="doc.url">
<tr class="table-info">
<td x-text="doc.title"></td>
<td>
<a :href="doc.url" target="_blank" rel="noopener" x-text="doc.url" class="text-truncate d-inline-block" style="max-width: 400px;"></a>
</td>
</tr>
</template>
</tbody>
</table>
</div>
</div>
</div>

<!-- Pagination -->
{% if page_obj.has_other_pages %}
<nav class="mt-3">
<ul class="pagination justify-content-center">
{% if page_obj.has_previous %}
<li class="page-item">
<a class="page-link" href="?page={{ page_obj.previous_page_number }}">Previous</a>
</li>
{% else %}
<li class="page-item disabled"><span class="page-link">Previous</span></li>
{% endif %}

{% for num in page_obj.paginator.page_range %}
<li class="page-item {% if page_obj.number == num %}active{% endif %}">
<a class="page-link" href="?page={{ num }}">{{ num }}</a>
</li>
{% endfor %}

{% if page_obj.has_next %}
<li class="page-item">
<a class="page-link" href="?page={{ page_obj.next_page_number }}">Next</a>
</li>
{% else %}
<li class="page-item disabled"><span class="page-link">Next</span></li>
{% endif %}
</ul>
</nav>
{% endif %}
</div>

<script>
function kbSync() {
return {
syncing: false,
syncResult: null,
syncError: null,
statusMap: {},
untracked: [],
syncSummary: '',

async sync() {
this.syncing = true;
this.syncError = null;
this.syncResult = null;
this.untracked = [];
try {
const response = await fetch('{% url "ask:kb-sync" %}', {
method: 'POST',
headers: { 'X-CSRFToken': '{{ csrf_token }}' },
});
const data = await response.json();
if (data.success) {
this.syncResult = data;
this.statusMap = {};
for (const r of data.resources) {
this.statusMap[r.id] = r.status;
}
this.untracked = data.untracked || [];
const inSync = data.resources.filter(r => r.status === 'in_kb').length;
const missing = data.resources.filter(r => r.status === 'missing_from_kb').length;
this.syncSummary = `${inSync} in sync, ${missing} missing from KB, ${this.untracked.length} in KB but untracked. (${data.kb_total} KB documents total)`;
} else {
this.syncError = data.error || 'Sync failed.';
}
} catch (e) {
this.syncError = 'Failed to connect. Please try again.';
}
this.syncing = false;
},

getStatus(id) {
if (!this.syncResult) return '--';
const s = this.statusMap[id];
if (s === 'in_kb') return 'In Sync';
if (s === 'missing_from_kb') return 'Missing from KB';
return '--';
},

getRowClass(id) {
if (!this.syncResult) return '';
const s = this.statusMap[id];
if (s === 'in_kb') return 'table-success';
if (s === 'missing_from_kb') return 'table-warning';
return '';
}
};
}
</script>

<style>
[x-cloak] { display: none !important; }
</style>
{% endblock %}
2 changes: 2 additions & 0 deletions hospexplorer/ask/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@
re_path(r"^terms/$", views.terms_view, name="terms-view"),
re_path(r"^terms/accept/$", views.terms_accept, name="terms-accept"),
re_path(r"^history/delete$", views.delete_history, name="delete-history"),
re_path(r"^kb/$", views.kb_resources, name="kb-resources"),
re_path(r"^kb/sync/$", views.kb_sync, name="kb-sync"),
]
93 changes: 92 additions & 1 deletion hospexplorer/ask/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
from django.contrib.auth.decorators import login_required
from django.views.decorators.http import require_GET, require_POST, require_http_methods

from ask.models import Conversation, QARecord, QueryTask, TermsAcceptance
import httpx
from django.core.paginator import Paginator

from ask.models import Conversation, QARecord, QueryTask, TermsAcceptance, WebsiteResource
from ask.tasks import run_llm_task
from ask.kb_connector import list_kb_documents

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -176,3 +180,90 @@ def terms_view(request):
def delete_history(request):
request.user.conversations.all().delete()
return JsonResponse({"message": "All conversations deleted successfully!"})


@login_required
def kb_resources(request):
"""Display paginated list of Knowledge Base resources from internal DB."""
resources = WebsiteResource.objects.all().order_by("-modified_at")
paginator = Paginator(resources, settings.KB_RESOURCES_PAGE_SIZE)
page_number = request.GET.get("page", 1)
page_obj = paginator.get_page(page_number)
return render(request, "kb/resources.html", {"page_obj": page_obj})


@login_required
@require_POST
def kb_sync(request):
"""Compare internal WebsiteResource records with MCP KB documents.

How it works:
1. Paginates through ALL documents from the MCP KB server via GET /docs/list
(each doc has: id, title, url, chunks). Collects all KB doc URLs into a set.
2. Iterates over all internal WebsiteResource records from Django's DB.
Compares each resource's URL against the KB URL set:
- "in_kb": resource URL exists in KB — the resource is indexed
- "missing_from_kb": resource URL NOT in KB — needs to be added/re-ingested
3. Also finds "untracked" docs: URLs present in the KB but not in the
internal WebsiteResource table (added to KB outside of this app).

Returns JSON to the frontend:
- resources: list of {id, url, title, status} for each internal resource
- untracked: list of {url, title} for KB docs not tracked internally
- kb_total: total documents in the KB
- internal_total: total WebsiteResource records in Django DB
"""
try:
kb_docs = []
page = 1
while True:
data = list_kb_documents(page=page, page_size=50)
kb_docs.extend(data.get("documents", []))
if len(kb_docs) >= data.get("total", 0):
break
page += 1

kb_urls = {doc["url"] for doc in kb_docs if doc.get("url")}

# compare each internal WebsiteResource urls against KB URLs
internal_resources = WebsiteResource.objects.all()
results = []
internal_urls = set()
for resource in internal_resources:
internal_urls.add(resource.url)
results.append({
"id": resource.id,
"url": resource.url,
"title": resource.title,
"status": "in_kb" if resource.url in kb_urls else "missing_from_kb",
})

untracked = [
{"url": doc["url"], "title": doc["title"]}
for doc in kb_docs
if doc.get("url") and doc["url"] not in internal_urls
]

return JsonResponse({
"success": True,
"resources": results,
"untracked": untracked,
"kb_total": len(kb_docs),
"internal_total": len(results),
})
except httpx.ConnectError:
return JsonResponse({
"success": False,
"error": "Could not connect to the Knowledge Base server.",
}, status=503)
except httpx.HTTPStatusError as e:
return JsonResponse({
"success": False,
"error": f"Knowledge Base server returned an error (HTTP {e.response.status_code}).",
}, status=502)
except Exception:
logger.exception("KB sync failed")
return JsonResponse({
"success": False,
"error": "An unexpected error occurred during sync.",
}, status=500)
8 changes: 8 additions & 0 deletions hospexplorer/hospexplorer/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", 120))
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096"))

# MCP Knowledge Base Configuration
KB_MCP_HOST = os.getenv("KB_MCP_HOST", "http://localhost:8002")
KB_MCP_JWT_TOKEN = os.getenv("KB_MCP_JWT_TOKEN", "")
KB_MCP_TIMEOUT = int(os.getenv("KB_MCP_TIMEOUT", 30))

# Number of resources to fetch per page
KB_RESOURCES_PAGE_SIZE = int(os.getenv("KB_RESOURCES_PAGE_SIZE", 20))

# Sidebar conversations limit
SIDEBAR_CONVERSATIONS_LIMIT = int(os.getenv("SIDEBAR_CONVERSATIONS_LIMIT", 10))

Expand Down