diging · Girik1105 · May 11, 2026 · May 12, 2026 · May 19, 2026 · May 20, 2026
diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py
@@ -296,7 +296,7 @@ class WebsiteResourceAdmin(KBDeleteAdminMixin, admin.ModelAdmin):
         (None, {"fields": ("title", "description", "url")}),
         ("Metadata", {"fields": (
             "date_published",
-            "document_type", "document_author_institution", "institution_type", "publisher"
+            "document_type", "document_author_institution", "institution_type", "publisher",
         )}),
         ("Status", {"fields": (
             "status", "status_message", "mcp_kb_document_id",
@@ -396,6 +396,7 @@ def _apply_zip_csv_metadata(obj, row):
     if publisher_raw:
         obj.publisher = publisher_raw
 
+
     for column, model in ZIP_CSV_LOOKUP_COLUMNS.items():
         value = (row.get(column) or "").strip()
         if not value:
@@ -503,6 +504,9 @@ def zip_upload_view(self, request):
                 messages.error(request, "Please select a zip file to upload.")
                 return HttpResponseRedirect(request.path)
 
+            update_file = bool(request.POST.get("update_file"))
+            update_metadata = bool(request.POST.get("update_metadata"))
+
             try:
                 archive = zipfile.ZipFile(zip_file)
             except zipfile.BadZipFile:
@@ -554,7 +558,9 @@ def _is_real(name):
 
                 total = 0
                 saved = 0
-                queued_ids = []
+                updated = 0
+                queued_new_ids = []
+                queued_replace_ids = []
                 for row in reader:
                     total += 1
                     filename = (row.get(filename_col) or "").strip()
@@ -567,19 +573,45 @@ def _is_real(name):
                         continue
 
                     basename = os.path.basename(filename)
-                    if (basename, title) in existing_pdfs:
-                        messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.")
-                        continue
+                    is_update = (basename, title) in existing_pdfs
 
-                    member = zip_members.get(filename) or zip_members.get(basename)
-                    if not member:
-                        messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.")
+                    if is_update and not (update_file or update_metadata):
+                        messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.")
                         continue
 
-                    try:
-                        pdf_bytes = archive.read(member)
-                    except KeyError:
-                        messages.warning(request, f"Row {total}: could not read '{filename}'; skipped.")
+                    # only read PDF bytes when we'll actually use them: new rows
+                    # always need them; existing rows only when update_file is set
+                    pdf_bytes = None
+                    if (not is_update) or update_file:
+                        member = zip_members.get(filename) or zip_members.get(basename)
+                        if not member:
+                            messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.")
+                            continue
+                        try:
+                            pdf_bytes = archive.read(member)
+                        except KeyError:
+                            messages.warning(request, f"Row {total}: could not read '{filename}'; skipped.")
+                            continue
+
+                    if is_update:
+                        match = PDFResource.objects.filter(
+                            original_filename=basename, title=title
+                        ).first()
+                        if match is None:
+                            messages.warning(request, f"Row {total}: '{filename}' lookup failed; skipped.")
+                            continue
+                        if update_metadata:
+                            for warning in _apply_zip_csv_metadata(match, row):
+                                messages.warning(request, f"Row {total}: {warning}")
+                        if update_file:
+                            match.file.delete(save=False)
+                            match.file.save(basename, ContentFile(pdf_bytes), save=False)
+                        match.modifier = request.user
+                        match.status = PDFResource.Status.PROCESSING
+                        match.status_message = "Queued for Knowledge Base re-upload."
+                        match.save()
+                        updated += 1
+                        queued_replace_ids.append(match.pk)
                         continue
 
                     obj = PDFResource(
@@ -595,23 +627,34 @@ def _is_real(name):
                     obj.file.save(basename, ContentFile(pdf_bytes), save=True)
                     saved += 1
                     existing_pdfs.add((basename, title))
-                    queued_ids.append(obj.pk)
+                    queued_new_ids.append(obj.pk)
 
                 # fire KB uploads after the request transaction commits so background
                 # threads see the just-saved rows
-                def _start_uploads(ids=tuple(queued_ids)):
-                    for pk in ids:
+                def _start_uploads(
+                    new_ids=tuple(queued_new_ids),
+                    replace_ids=tuple(queued_replace_ids),
+                ):
+                    for pk in new_ids:
+                        threading.Thread(
+                            target=run_kb_resource_upload,
+                            args=("pdf", pk),
+                            daemon=True,
+                        ).start()
+                    for pk in replace_ids:
                         threading.Thread(
                             target=run_kb_resource_upload,
                             args=("pdf", pk),
+                            kwargs={"replace": True},
                             daemon=True,
                         ).start()
                 transaction.on_commit(_start_uploads)
 
                 messages.success(
                     request,
-                    f"Imported {saved} of {total} PDFs. Knowledge Base uploads are running in the "
-                    "background — refresh the list to see each row's final status.",
+                    f"Imported {saved} new and updated {updated} of {total} PDF rows. "
+                    "Knowledge Base uploads are running in the background — "
+                    "refresh the list to see each row's final status.",
                 )
                 return HttpResponseRedirect(changelist_url)
 

diff --git a/hospexplorer/ask/migrations/0014_merge_20260522_2257.py b/hospexplorer/ask/migrations/0014_merge_20260522_2257.py
@@ -0,0 +1,14 @@
+# Generated by Django 6.0.2 on 2026-05-22 22:57
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('ask', '0013_documentauthorinstitution_documenttype_and_more'),
+        ('ask', '0013_pdfresource_original_filename'),
+    ]
+
+    operations = [
+    ]
diff --git a/hospexplorer/ask/migrations/0015_iso_date_published.py b/hospexplorer/ask/migrations/0015_iso_date_published.py
@@ -0,0 +1,65 @@
+"""Switch date_published from (DateField + precision enum) to a single
+CharField holding a partial ISO 8601 date string.
+
+The MissingMigration plus an AlterField wouldn't carry the precision
+information across, so this migration: adds a temporary CharField, copies
+each row's old (date, precision) pair into a partial ISO string, removes
+the old fields, then renames the temp field to the canonical name.
+"""
+from django.db import migrations, models
+
+
+def _to_iso_partial(date, precision):
+    if date is None:
+        return ""
+    if precision == "year":
+        return f"{date.year:04d}"
+    if precision == "month":
+        return f"{date.year:04d}-{date.month:02d}"
+    # "day" — or any other / empty precision, which we treat as a full date
+    return date.isoformat()
+
+
+def forwards(apps, schema_editor):
+    for model_name in ("PDFResource", "WebsiteResource"):
+        Model = apps.get_model("ask", model_name)
+        for obj in Model.objects.all():
+            obj.date_published_iso = _to_iso_partial(
+                obj.date_published, obj.date_published_precision
+            )
+            obj.save(update_fields=["date_published_iso"])
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("ask", "0014_merge_20260526_2133"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="pdfresource",
+            name="date_published_iso",
+            field=models.CharField(blank=True, default="", max_length=10),
+        ),
+        migrations.AddField(
+            model_name="websiteresource",
+            name="date_published_iso",
+            field=models.CharField(blank=True, default="", max_length=10),
+        ),
+        migrations.RunPython(forwards, migrations.RunPython.noop),
+        migrations.RemoveField(model_name="pdfresource", name="date_published"),
+        migrations.RemoveField(model_name="websiteresource", name="date_published"),
+        migrations.RemoveField(model_name="pdfresource", name="date_published_precision"),
+        migrations.RemoveField(model_name="websiteresource", name="date_published_precision"),
+        migrations.RenameField(
+            model_name="pdfresource",
+            old_name="date_published_iso",
+            new_name="date_published",
+        ),
+        migrations.RenameField(
+            model_name="websiteresource",
+            old_name="date_published_iso",
+            new_name="date_published",
+        ),
+    ]
diff --git a/hospexplorer/ask/tasks.py b/hospexplorer/ask/tasks.py
@@ -164,15 +164,20 @@ def _build_resource_metadata(obj):
     }
 
 
-def run_kb_resource_upload(model_label, resource_id):
+def run_kb_resource_upload(model_label, resource_id, replace=False):
     """Background thread: push a resource to the MCP KB and record its doc_id.
 
     Runs outside the admin's atomic save transaction so a slow or timing-out
     MCP call can't roll back the local row. The object's status/status_message
     are updated at each phase so the admin can surface progress and errors.
+
+    When ``replace`` is True and the row already has an ``mcp_kb_document_id``,
+    the existing KB doc is deleted before the new one is added — used by the
+    zip importer when an "update file" / "update metadata" re-upload would
+    otherwise leave the old chunks in the KB alongside the new ones.
     """
     from ask.models import WebsiteResource, PDFResource, Resource
-    from ask.kb_connector import add_pdf_to_kb, add_website_to_kb
+    from ask.kb_connector import add_pdf_to_kb, add_website_to_kb, delete_kb_document
 
     if model_label == "pdf":
         Model = PDFResource
@@ -188,6 +193,19 @@ def run_kb_resource_upload(model_label, resource_id):
         logger.error("run_kb_resource_upload: %s id=%s not found", model_label, resource_id)
         return
 
+    if replace and obj.mcp_kb_document_id:
+        # best-effort: if delete fails the re-add still happens, leaving a
+        # stale duplicate in the KB rather than losing the new upload
+        old_doc_id = obj.mcp_kb_document_id
+        try:
+            delete_kb_document(old_doc_id)
+        except Exception:
+            logger.warning(
+                "run_kb_resource_upload: failed to delete old KB doc_id=%s for %s id=%s; "
+                "re-adding anyway", old_doc_id, model_label, resource_id,
+            )
+        obj.mcp_kb_document_id = None
+
     try:
         metadata = _build_resource_metadata(obj)
         if model_label == "pdf":

diff --git a/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html b/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html
@@ -30,6 +30,18 @@
         <label for="zip_file">Zip file:</label>
         <input type="file" name="zip_file" id="zip_file" accept=".zip" required>
     </p>
+    <p>
+        <label>
+            <input type="checkbox" name="update_file" id="update_file">
+            Update file: overwrite existing PDFs (same filename and title) with the newly uploaded files.
+        </label>
+    </p>
+    <p>
+        <label>
+            <input type="checkbox" name="update_metadata" id="update_metadata">
+            Update metadata: refresh metadata on existing PDFs (same filename and title).
+        </label>
+    </p>
     <div class="submit-row">
         <input type="submit" value="Upload" class="default">
         <a href="{{ changelist_url }}" class="closelink">Cancel</a>