forked from D-Net/dnet-hadoop
[Cleaning] drop instance.alternateIdentifier elements when they are available among instance.pid
This commit is contained in:
parent
751125fdf9
commit
827e7e37db
|
@ -152,17 +152,9 @@ public class CleaningFunctions {
|
|||
Optional
|
||||
.ofNullable(i.getPid())
|
||||
.ifPresent(pid -> {
|
||||
final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid());
|
||||
i
|
||||
.setAlternateIdentifier(
|
||||
Optional
|
||||
.ofNullable(i.getAlternateIdentifier())
|
||||
.map(
|
||||
altId -> altId
|
||||
.stream()
|
||||
.filter(p -> !pids.contains(p))
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(Lists.newArrayList()));
|
||||
final Set<StructuredProperty> pids = Sets.newHashSet(pid);
|
||||
final Set<StructuredProperty> altIds = Sets.newHashSet(i.getAlternateIdentifier());
|
||||
i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
|
||||
});
|
||||
|
||||
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
||||
|
|
|
@ -87,11 +87,67 @@ public class CleaningFunctionTest {
|
|||
.map(p -> p.getQualifier())
|
||||
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
||||
|
||||
Publication p_defaults = CleaningFunctions.cleanup(p_out);
|
||||
assertEquals("CLOSED", p_defaults.getBestaccessright().getClassid());
|
||||
List<Instance> poi = p_out.getInstance();
|
||||
assertNotNull(poi);
|
||||
assertEquals(1, poi.size());
|
||||
|
||||
final Instance poii = poi.get(0);
|
||||
assertNotNull(poii);
|
||||
assertNotNull(poii.getPid());
|
||||
|
||||
assertEquals(2, poii.getPid().size());
|
||||
|
||||
assertTrue(
|
||||
poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
|
||||
assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
|
||||
|
||||
assertNotNull(poii.getAlternateIdentifier());
|
||||
assertEquals(2, poii.getAlternateIdentifier().size());
|
||||
|
||||
assertTrue(
|
||||
poii
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("10.1007/s109090161569x"))
|
||||
.findFirst()
|
||||
.isPresent());
|
||||
assertTrue(
|
||||
poii
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("10.1009/qwerty"))
|
||||
.findFirst()
|
||||
.isPresent());
|
||||
|
||||
Publication p_cleaned = CleaningFunctions.cleanup(p_out);
|
||||
assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid());
|
||||
assertNull(p_out.getPublisher());
|
||||
|
||||
getAuthorPids(p_defaults).forEach(pid -> {
|
||||
final List<Instance> pci = p_cleaned.getInstance();
|
||||
assertNotNull(pci);
|
||||
assertEquals(1, pci.size());
|
||||
|
||||
final Instance pcii = pci.get(0);
|
||||
assertNotNull(pcii);
|
||||
assertNotNull(pcii.getPid());
|
||||
|
||||
assertEquals(2, pcii.getPid().size());
|
||||
|
||||
assertTrue(
|
||||
pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
|
||||
assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
|
||||
|
||||
assertNotNull(pcii.getAlternateIdentifier());
|
||||
assertEquals(1, pcii.getAlternateIdentifier().size());
|
||||
assertTrue(
|
||||
pcii
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("10.1009/qwerty"))
|
||||
.findFirst()
|
||||
.isPresent());
|
||||
|
||||
getAuthorPids(p_cleaned).forEach(pid -> {
|
||||
System.out
|
||||
.println(
|
||||
String
|
||||
|
@ -101,7 +157,7 @@ public class CleaningFunctionTest {
|
|||
});
|
||||
|
||||
// TODO add more assertions to verity the cleaned values
|
||||
System.out.println(MAPPER.writeValueAsString(p_out));
|
||||
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||
|
||||
/*
|
||||
* assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));
|
||||
|
|
|
@ -318,6 +318,50 @@
|
|||
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
||||
"instance": [
|
||||
{
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": null,
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1007/s109090161569x"
|
||||
},
|
||||
{
|
||||
"dataInfo": null,
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1008/abcd"
|
||||
}
|
||||
],
|
||||
"alternateIdentifier": [
|
||||
{
|
||||
"dataInfo": null,
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1007/s109090161569x"
|
||||
},
|
||||
{
|
||||
"dataInfo": null,
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1009/qwerty"
|
||||
}
|
||||
],
|
||||
"accessright": {
|
||||
"classid": "CLOSED",
|
||||
"classname": "CLOSED",
|
||||
|
|
Loading…
Reference in New Issue