forked from D-Net/dnet-hadoop
[Cleaning] drop instance.alternateIdentifier elements when they are available among instance.pid
This commit is contained in:
parent
751125fdf9
commit
827e7e37db
|
@ -152,17 +152,9 @@ public class CleaningFunctions {
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(i.getPid())
|
.ofNullable(i.getPid())
|
||||||
.ifPresent(pid -> {
|
.ifPresent(pid -> {
|
||||||
final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid());
|
final Set<StructuredProperty> pids = Sets.newHashSet(pid);
|
||||||
i
|
final Set<StructuredProperty> altIds = Sets.newHashSet(i.getAlternateIdentifier());
|
||||||
.setAlternateIdentifier(
|
i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
|
||||||
Optional
|
|
||||||
.ofNullable(i.getAlternateIdentifier())
|
|
||||||
.map(
|
|
||||||
altId -> altId
|
|
||||||
.stream()
|
|
||||||
.filter(p -> !pids.contains(p))
|
|
||||||
.collect(Collectors.toList()))
|
|
||||||
.orElse(Lists.newArrayList()));
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
||||||
|
|
|
@ -87,11 +87,67 @@ public class CleaningFunctionTest {
|
||||||
.map(p -> p.getQualifier())
|
.map(p -> p.getQualifier())
|
||||||
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
||||||
|
|
||||||
Publication p_defaults = CleaningFunctions.cleanup(p_out);
|
List<Instance> poi = p_out.getInstance();
|
||||||
assertEquals("CLOSED", p_defaults.getBestaccessright().getClassid());
|
assertNotNull(poi);
|
||||||
|
assertEquals(1, poi.size());
|
||||||
|
|
||||||
|
final Instance poii = poi.get(0);
|
||||||
|
assertNotNull(poii);
|
||||||
|
assertNotNull(poii.getPid());
|
||||||
|
|
||||||
|
assertEquals(2, poii.getPid().size());
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
|
||||||
|
assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
|
||||||
|
|
||||||
|
assertNotNull(poii.getAlternateIdentifier());
|
||||||
|
assertEquals(2, poii.getAlternateIdentifier().size());
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
poii
|
||||||
|
.getAlternateIdentifier()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> s.getValue().equals("10.1007/s109090161569x"))
|
||||||
|
.findFirst()
|
||||||
|
.isPresent());
|
||||||
|
assertTrue(
|
||||||
|
poii
|
||||||
|
.getAlternateIdentifier()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> s.getValue().equals("10.1009/qwerty"))
|
||||||
|
.findFirst()
|
||||||
|
.isPresent());
|
||||||
|
|
||||||
|
Publication p_cleaned = CleaningFunctions.cleanup(p_out);
|
||||||
|
assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid());
|
||||||
assertNull(p_out.getPublisher());
|
assertNull(p_out.getPublisher());
|
||||||
|
|
||||||
getAuthorPids(p_defaults).forEach(pid -> {
|
final List<Instance> pci = p_cleaned.getInstance();
|
||||||
|
assertNotNull(pci);
|
||||||
|
assertEquals(1, pci.size());
|
||||||
|
|
||||||
|
final Instance pcii = pci.get(0);
|
||||||
|
assertNotNull(pcii);
|
||||||
|
assertNotNull(pcii.getPid());
|
||||||
|
|
||||||
|
assertEquals(2, pcii.getPid().size());
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
|
||||||
|
assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
|
||||||
|
|
||||||
|
assertNotNull(pcii.getAlternateIdentifier());
|
||||||
|
assertEquals(1, pcii.getAlternateIdentifier().size());
|
||||||
|
assertTrue(
|
||||||
|
pcii
|
||||||
|
.getAlternateIdentifier()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> s.getValue().equals("10.1009/qwerty"))
|
||||||
|
.findFirst()
|
||||||
|
.isPresent());
|
||||||
|
|
||||||
|
getAuthorPids(p_cleaned).forEach(pid -> {
|
||||||
System.out
|
System.out
|
||||||
.println(
|
.println(
|
||||||
String
|
String
|
||||||
|
@ -101,7 +157,7 @@ public class CleaningFunctionTest {
|
||||||
});
|
});
|
||||||
|
|
||||||
// TODO add more assertions to verity the cleaned values
|
// TODO add more assertions to verity the cleaned values
|
||||||
System.out.println(MAPPER.writeValueAsString(p_out));
|
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));
|
* assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));
|
||||||
|
|
|
@ -318,6 +318,50 @@
|
||||||
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
||||||
"instance": [
|
"instance": [
|
||||||
{
|
{
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1008/abcd"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"alternateIdentifier": [
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1009/qwerty"
|
||||||
|
}
|
||||||
|
],
|
||||||
"accessright": {
|
"accessright": {
|
||||||
"classid": "CLOSED",
|
"classid": "CLOSED",
|
||||||
"classname": "CLOSED",
|
"classname": "CLOSED",
|
||||||
|
|
Loading…
Reference in New Issue