From 64d27574c4a40ce459fbac8560691f361533f422 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 30 Jun 2026 13:57:47 +0200 Subject: [PATCH] Issue 1361 fix: DOI Organizer creates duplicate dc.identifier.doi metadata (ufal/clarin-dspace#1368) * Issue 1361 fix: DOI Organizer creates duplicate dc.identifier.doi metadata * copilot comments * Issue 1361: make DOI metadata save additive, report duplicates via QA saveDOIToObject now adds dc.identifier.doi only when that exact value is not already present, and no longer deletes metadata. The previous fix cleared existing values when more than one was found or when a different DOI was present; since this method runs after the DOI has already been registered with the external agency, silently dropping a (possibly legacy/citable) identifier is lossy and irreversible. The operation stays idempotent, so re-registration no longer creates duplicate values. Items that legitimately end up with more than one dc.identifier.doi value are now surfaced for manual review by adding dc.identifier.doi to the ItemMetadataQAChecker noDuplicate list (metadataqa curation task) rather than being cleaned up silently in the write path. Tests: flip the replace test to assert a pre-existing different DOI is preserved alongside the new one, keep the idempotency test, and add a QA checker IT asserting an item with two DOIs fails curation. * local field renaming --------- Co-authored-by: Ondrej Kosarko (cherry picked from commit 3b7db4ca38c5bd7b047cb2861d8ee23fbec215e4) --- .../ctask/general/ItemMetadataQAChecker.java | 1 + .../identifier/DOIIdentifierProvider.java | 25 ++++-- .../curate/ItemMetadataQACheckerIT.java | 22 ++++++ .../identifier/DOIIdentifierProviderTest.java | 76 +++++++++++++++++++ 4 files changed, 118 insertions(+), 6 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java b/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java index c4e5ef709b67..2f813b01f232 100644 --- a/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java +++ b/dspace-api/src/main/java/org/dspace/ctask/general/ItemMetadataQAChecker.java @@ -87,6 +87,7 @@ public void init(Curator curator, String taskId) throws IOException { "dc.rights.label", "dc.date.available", "dc.source.uri", + "dc.identifier.doi", "metashare.ResourceInfo#DistributionInfo#LicenseInfo.license" }); strangeMetadata = configurationService.getArrayProperty("lr.curation.metadata.strange", new String[]{ diff --git a/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java b/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java index dd48131b1fdf..ff5097aa5e5d 100644 --- a/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java +++ b/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java @@ -1067,13 +1067,26 @@ protected void saveDOIToObject(Context context, DSpaceObject dso, String doi) } Item item = (Item) dso; - itemService.addMetadata(context, item, MD_SCHEMA, DOI_ELEMENT, DOI_QUALIFIER, null, - doiService.DOIToExternalForm(doi)); - try { - itemService.update(context, item); - } catch (SQLException | AuthorizeException ex) { - throw ex; + String doiURL = doiService.DOIToExternalForm(doi); + + // Add the DOI to the metadata only if this exact value is not present yet. This keeps the operation + // idempotent (re-registration does not create duplicate values) without ever deleting metadata: a + // pre-existing, different DOI is left untouched. This method is called after the DOI has already been + // registered with the external agency, so destroying metadata here would be lossy and irreversible. + // Items that end up with more than one dc.identifier.doi value are surfaced by the ItemMetadataQAChecker + // curation task for manual review. + List existing = itemService.getMetadata(item, MD_SCHEMA, DOI_ELEMENT, DOI_QUALIFIER, Item.ANY); + boolean alreadyPresent = existing.stream() + .anyMatch(metadataValue -> doiURL.equals(metadataValue.getValue())); + + if (alreadyPresent) { + log.debug("The DOI {} is already part of the metadata of Item {}. Not adding it again.", + doi, item.getID()); + return; } + + itemService.addMetadata(context, item, MD_SCHEMA, DOI_ELEMENT, DOI_QUALIFIER, null, doiURL); + itemService.update(context, item); } /** diff --git a/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java b/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java index 315a04a68b11..446e5e809daf 100644 --- a/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java +++ b/dspace-api/src/test/java/org/dspace/curate/ItemMetadataQACheckerIT.java @@ -63,6 +63,7 @@ public class ItemMetadataQACheckerIT extends AbstractIntegrationTestWithDatabase Item itemWithIncorrectLanguageName; Item itemWithTwoAvailableDates; Item itemWithTwoAvailableDatesAndLang; + Item itemWithTwoDois; Item itemVersion1; Item itemVersion2; Item itemVersion3; @@ -146,6 +147,13 @@ public void setUp() throws Exception { itemService.addMetadata(context, itemWithTwoAvailableDatesAndLang,"dc", "date", "available", "en_US", "2021-01-01"); + itemWithTwoDois = ItemBuilder.createItem(context, collection) + .withTitle("Item With Two DOIs") + .withMetadata("dc", "type", null, "corpus") + .withMetadata("dc", "identifier", "doi", "https://doi.org/10.5072/test-1") + .withMetadata("dc", "identifier", "doi", "https://doi.org/10.5072/test-2") + .build(); + itemVersion1 = ItemBuilder.createItem(context, collection) .withTitle("Item Version 1") .withMetadata("dc", "type", null, "corpus") @@ -233,6 +241,20 @@ public void testItemWithTwoAvailableDatesAndLang() throws IOException { assertTrue("Result should mention multiple dc.date.available", result.contains("dc.date.available")); } + @Test + public void testItemWithTwoDois() throws IOException { + Curator curator = new Curator(); + curator.addTask(TASK_NAME); + context.setCurrentUser(admin); + + // Run curator task for item with two dc.identifier.doi - should fail + curator.curate(context, itemWithTwoDois.getHandle()); + int status = curator.getStatus(TASK_NAME); + assertEquals("Curation should fail for item with two dc.identifier.doi", Curator.CURATE_FAIL, status); + String result = curator.getResult(TASK_NAME); + assertTrue("Result should mention multiple dc.identifier.doi", result.contains("dc.identifier.doi")); + } + @Test public void testValidItem() throws IOException { Curator curator = new Curator(); diff --git a/dspace-api/src/test/java/org/dspace/identifier/DOIIdentifierProviderTest.java b/dspace-api/src/test/java/org/dspace/identifier/DOIIdentifierProviderTest.java index 43f93e606dfd..7ef10de02311 100644 --- a/dspace-api/src/test/java/org/dspace/identifier/DOIIdentifierProviderTest.java +++ b/dspace-api/src/test/java/org/dspace/identifier/DOIIdentifierProviderTest.java @@ -23,6 +23,7 @@ import java.util.Date; import java.util.List; import java.util.Random; +import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.ObjectUtils; @@ -332,6 +333,51 @@ public void testStore_DOI_as_item_metadata() assertTrue("Cannot store DOI as item metadata value.", result); } + @Test + public void testStore_DOI_keeps_existing_different_doi_metadata() throws SQLException, AuthorizeException, + IOException, IdentifierException, IllegalAccessException, WorkflowException { + Item item = newItem(); + + // this checks that the method does not fail if there is already a *different* DOI in the metadata, + // here we verify that the existing DOI is preserved (not deleted) and the new one is added alongside it. + // Items with more than one DOI are reported by the ItemMetadataQAChecker curation task, not silently + // cleaned up here. + String oldDoi = DOI.SCHEME + PREFIX + "/" + NAMESPACE_SEPARATOR + "1234"; + String newDoi = DOI.SCHEME + PREFIX + "/" + NAMESPACE_SEPARATOR + Long.toHexString(new Date().getTime()); + + context.turnOffAuthorisationSystem(); + itemService.addMetadata(context, item, DOIIdentifierProvider.MD_SCHEMA, + DOIIdentifierProvider.DOI_ELEMENT, + DOIIdentifierProvider.DOI_QUALIFIER, + null, + doiService.DOIToExternalForm(oldDoi)); + provider.saveDOIToObject(context, item, newDoi); + context.restoreAuthSystemState(); + + checkDoiMetadata(item, oldDoi, newDoi); + } + + @Test + public void testStore_DOI_check_single_doi_metadata() throws SQLException, AuthorizeException, IOException, + IdentifierException, IllegalAccessException, WorkflowException { + Item item = newItem(); + + // this checks that the method does not fail if there is already a DOI in the metadata, + // here we check if DOI metadata are not duplicated + String doi = DOI.SCHEME + PREFIX + "/" + NAMESPACE_SEPARATOR + Long.toHexString(new Date().getTime()); + + context.turnOffAuthorisationSystem(); + itemService.addMetadata(context, item, DOIIdentifierProvider.MD_SCHEMA, + DOIIdentifierProvider.DOI_ELEMENT, + DOIIdentifierProvider.DOI_QUALIFIER, + null, + doiService.DOIToExternalForm(doi)); + provider.saveDOIToObject(context, item, doi); + context.restoreAuthSystemState(); + + checkSingleDoiMetadata(item, doi); + } + @Test public void testGet_DOI_out_of_item_metadata() throws SQLException, AuthorizeException, IOException, IdentifierException, IllegalAccessException, @@ -868,4 +914,34 @@ public void testLoadOrCreateDOIReturnsMintedStatus() // registerOnline // reserveOnline + private void checkSingleDoiMetadata(Item item, String doi) throws IdentifierException { + List metadata = itemService.getMetadata(item, DOIIdentifierProvider.MD_SCHEMA, + DOIIdentifierProvider.DOI_ELEMENT, + DOIIdentifierProvider.DOI_QUALIFIER, + Item.ANY); + boolean result = false; + if (metadata.size() == 1 && metadata.get(0).getValue().equals(doiService.DOIToExternalForm(doi))) { + result = true; + } + assertTrue("Invalid or duplicate 'dc.identifier.doi' metadata value(s).", result); + } + + private void checkDoiMetadata(Item item, String... dois) throws IdentifierException { + List values = itemService.getMetadata(item, DOIIdentifierProvider.MD_SCHEMA, + DOIIdentifierProvider.DOI_ELEMENT, + DOIIdentifierProvider.DOI_QUALIFIER, + Item.ANY) + .stream() + .map(MetadataValue::getValue) + .collect(Collectors.toList()); + + List expected = new ArrayList<>(); + for (String doi : dois) { + expected.add(doiService.DOIToExternalForm(doi)); + } + + assertEquals("Unexpected number of 'dc.identifier.doi' metadata values.", expected.size(), values.size()); + assertTrue("Expected 'dc.identifier.doi' metadata values are missing.", values.containsAll(expected)); + } + }