[Cleaning] drop instance.alternateIdentifier elements when they are available among instance.pid

This commit is contained in:
Claudio Atzori 2021-03-25 11:07:59 +01:00
parent 751125fdf9
commit 827e7e37db
3 changed files with 107 additions and 15 deletions

View File

@ -152,17 +152,9 @@ public class CleaningFunctions {
Optional Optional
.ofNullable(i.getPid()) .ofNullable(i.getPid())
.ifPresent(pid -> { .ifPresent(pid -> {
final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid()); final Set<StructuredProperty> pids = Sets.newHashSet(pid);
i final Set<StructuredProperty> altIds = Sets.newHashSet(i.getAlternateIdentifier());
.setAlternateIdentifier( i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
Optional
.ofNullable(i.getAlternateIdentifier())
.map(
altId -> altId
.stream()
.filter(p -> !pids.contains(p))
.collect(Collectors.toList()))
.orElse(Lists.newArrayList()));
}); });
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) { if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {

View File

@ -87,11 +87,67 @@ public class CleaningFunctionTest {
.map(p -> p.getQualifier()) .map(p -> p.getQualifier())
.allMatch(q -> pidTerms.contains(q.getClassid()))); .allMatch(q -> pidTerms.contains(q.getClassid())));
Publication p_defaults = CleaningFunctions.cleanup(p_out); List<Instance> poi = p_out.getInstance();
assertEquals("CLOSED", p_defaults.getBestaccessright().getClassid()); assertNotNull(poi);
assertEquals(1, poi.size());
final Instance poii = poi.get(0);
assertNotNull(poii);
assertNotNull(poii.getPid());
assertEquals(2, poii.getPid().size());
assertTrue(
poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
assertNotNull(poii.getAlternateIdentifier());
assertEquals(2, poii.getAlternateIdentifier().size());
assertTrue(
poii
.getAlternateIdentifier()
.stream()
.filter(s -> s.getValue().equals("10.1007/s109090161569x"))
.findFirst()
.isPresent());
assertTrue(
poii
.getAlternateIdentifier()
.stream()
.filter(s -> s.getValue().equals("10.1009/qwerty"))
.findFirst()
.isPresent());
Publication p_cleaned = CleaningFunctions.cleanup(p_out);
assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid());
assertNull(p_out.getPublisher()); assertNull(p_out.getPublisher());
getAuthorPids(p_defaults).forEach(pid -> { final List<Instance> pci = p_cleaned.getInstance();
assertNotNull(pci);
assertEquals(1, pci.size());
final Instance pcii = pci.get(0);
assertNotNull(pcii);
assertNotNull(pcii.getPid());
assertEquals(2, pcii.getPid().size());
assertTrue(
pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
assertNotNull(pcii.getAlternateIdentifier());
assertEquals(1, pcii.getAlternateIdentifier().size());
assertTrue(
pcii
.getAlternateIdentifier()
.stream()
.filter(s -> s.getValue().equals("10.1009/qwerty"))
.findFirst()
.isPresent());
getAuthorPids(p_cleaned).forEach(pid -> {
System.out System.out
.println( .println(
String String
@ -101,7 +157,7 @@ public class CleaningFunctionTest {
}); });
// TODO add more assertions to verity the cleaned values // TODO add more assertions to verity the cleaned values
System.out.println(MAPPER.writeValueAsString(p_out)); System.out.println(MAPPER.writeValueAsString(p_cleaned));
/* /*
* assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));

View File

@ -318,6 +318,50 @@
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
"instance": [ "instance": [
{ {
"pid": [
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1007/s109090161569x"
},
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1008/abcd"
}
],
"alternateIdentifier": [
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1007/s109090161569x"
},
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1009/qwerty"
}
],
"accessright": { "accessright": {
"classid": "CLOSED", "classid": "CLOSED",
"classname": "CLOSED", "classname": "CLOSED",