forked from D-Net/dnet-hadoop
author pids made unique by value
This commit is contained in:
parent
e265c3e125
commit
2e9e13444d
|
@ -1,9 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.clean;
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.clearspring.analytics.util.Lists;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
|
import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
|
||||||
|
@ -13,6 +16,8 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
|
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
||||||
|
|
||||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
|
@ -139,6 +144,25 @@ public class CleaningFunctions {
|
||||||
author.setRank(i++);
|
author.setRank(i++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for(Author a : r.getAuthor()) {
|
||||||
|
if (Objects.isNull(a.getPid())) {
|
||||||
|
a.setPid(Lists.newArrayList());
|
||||||
|
} else {
|
||||||
|
a.setPid(
|
||||||
|
a.getPid().stream()
|
||||||
|
.filter(p -> Objects.nonNull(p.getQualifier()))
|
||||||
|
.filter(p -> StringUtils.isNotBlank(p.getValue()))
|
||||||
|
.map(p -> {
|
||||||
|
p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, ""));
|
||||||
|
return p;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toMap(StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1, LinkedHashMap::new))
|
||||||
|
.values()
|
||||||
|
.stream()
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (value instanceof Publication) {
|
if (value instanceof Publication) {
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,28 @@
|
||||||
"schemename": "dnet:pid_types"
|
"schemename": "dnet:pid_types"
|
||||||
},
|
},
|
||||||
"value": "0000-0001-9613-6639"
|
"value": "0000-0001-9613-6639"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "ORCID12",
|
||||||
|
"classname": "ORCID12",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "https://orcid.org/0000-0001-9613-6639"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
|
@ -91,8 +113,7 @@
|
||||||
],
|
],
|
||||||
"fullname": "Barry, Peter S.",
|
"fullname": "Barry, Peter S.",
|
||||||
"name": "Peter S.",
|
"name": "Peter S.",
|
||||||
"pid": [
|
"pid": null,
|
||||||
],
|
|
||||||
"rank": 3,
|
"rank": 3,
|
||||||
"surname": "Barry"
|
"surname": "Barry"
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue