From 827e7e37db7fa9774bb4d692d9ed160ce2837f39 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 25 Mar 2021 11:07:59 +0100 Subject: [PATCH] [Cleaning] drop instance.alternateIdentifier elements when they are available among instance.pid --- .../dhp/schema/oaf/CleaningFunctions.java | 14 +--- .../oa/graph/clean/CleaningFunctionTest.java | 64 +++++++++++++++++-- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 44 +++++++++++++ 3 files changed, 107 insertions(+), 15 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java index 412ed408e8..afbe0cff64 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java @@ -152,17 +152,9 @@ public class CleaningFunctions { Optional .ofNullable(i.getPid()) .ifPresent(pid -> { - final Set pids = Sets.newHashSet(i.getPid()); - i - .setAlternateIdentifier( - Optional - .ofNullable(i.getAlternateIdentifier()) - .map( - altId -> altId - .stream() - .filter(p -> !pids.contains(p)) - .collect(Collectors.toList())) - .orElse(Lists.newArrayList())); + final Set pids = Sets.newHashSet(pid); + final Set altIds = Sets.newHashSet(i.getAlternateIdentifier()); + i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids))); }); if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java index 0860c8bde2..fdbc58c179 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java @@ -87,11 +87,67 @@ public class CleaningFunctionTest { .map(p -> p.getQualifier()) .allMatch(q -> pidTerms.contains(q.getClassid()))); - Publication p_defaults = CleaningFunctions.cleanup(p_out); - assertEquals("CLOSED", p_defaults.getBestaccessright().getClassid()); + List poi = p_out.getInstance(); + assertNotNull(poi); + assertEquals(1, poi.size()); + + final Instance poii = poi.get(0); + assertNotNull(poii); + assertNotNull(poii.getPid()); + + assertEquals(2, poii.getPid().size()); + + assertTrue( + poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); + assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + + assertNotNull(poii.getAlternateIdentifier()); + assertEquals(2, poii.getAlternateIdentifier().size()); + + assertTrue( + poii + .getAlternateIdentifier() + .stream() + .filter(s -> s.getValue().equals("10.1007/s109090161569x")) + .findFirst() + .isPresent()); + assertTrue( + poii + .getAlternateIdentifier() + .stream() + .filter(s -> s.getValue().equals("10.1009/qwerty")) + .findFirst() + .isPresent()); + + Publication p_cleaned = CleaningFunctions.cleanup(p_out); + assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid()); assertNull(p_out.getPublisher()); - getAuthorPids(p_defaults).forEach(pid -> { + final List pci = p_cleaned.getInstance(); + assertNotNull(pci); + assertEquals(1, pci.size()); + + final Instance pcii = pci.get(0); + assertNotNull(pcii); + assertNotNull(pcii.getPid()); + + assertEquals(2, pcii.getPid().size()); + + assertTrue( + pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); + assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + + assertNotNull(pcii.getAlternateIdentifier()); + assertEquals(1, pcii.getAlternateIdentifier().size()); + assertTrue( + pcii + .getAlternateIdentifier() + .stream() + .filter(s -> s.getValue().equals("10.1009/qwerty")) + .findFirst() + .isPresent()); + + getAuthorPids(p_cleaned).forEach(pid -> { System.out .println( String @@ -101,7 +157,7 @@ public class CleaningFunctionTest { }); // TODO add more assertions to verity the cleaned values - System.out.println(MAPPER.writeValueAsString(p_out)); + System.out.println(MAPPER.writeValueAsString(p_cleaned)); /* * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index e746d236e4..23de2ef86b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -318,6 +318,50 @@ "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", "instance": [ { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], "accessright": { "classid": "CLOSED", "classname": "CLOSED",