enrichment steps #38
|
@ -1,41 +1,68 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.broker.model.Topic;
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.pace.config.DedupConfig;
|
import eu.dnetlib.pace.config.DedupConfig;
|
||||||
|
|
||||||
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> {
|
public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
|
||||||
|
|
||||||
public EnrichMissingAuthorOrcid() {
|
public EnrichMissingAuthorOrcid() {
|
||||||
super(true);
|
super(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final ResultWithRelations source,
|
protected List<UpdateInfo<String>> findUpdates(final ResultWithRelations source,
|
||||||
final ResultWithRelations target,
|
final ResultWithRelations target,
|
||||||
final DedupConfig dedupConfig) {
|
final DedupConfig dedupConfig) {
|
||||||
// TODO
|
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
final Set<String> existingOrcids = target
|
||||||
return Arrays.asList();
|
.getResult()
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.map(Author::getPid)
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
|
||||||
|
.map(pid -> pid.getValue())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
final List<UpdateInfo<String>> list = new ArrayList<>();
|
||||||
|
|
||||||
|
for (final Author author : source.getResult().getAuthor()) {
|
||||||
|
final String name = author.getFullname();
|
||||||
|
|
||||||
|
for (final StructuredProperty pid : author.getPid()) {
|
||||||
|
if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid")
|
||||||
|
&& !existingOrcids.contains(pid.getValue())) {
|
||||||
|
list
|
||||||
|
.add(
|
||||||
|
generateUpdateInfo(name + " [ORCID: " + pid.getValue() + "]", source, target, dedupConfig));
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
|
public UpdateInfo<String> generateUpdateInfo(final String highlightValue,
|
||||||
final ResultWithRelations source,
|
final ResultWithRelations source,
|
||||||
final ResultWithRelations target,
|
final ResultWithRelations target,
|
||||||
final DedupConfig dedupConfig) {
|
final DedupConfig dedupConfig) {
|
||||||
return new UpdateInfo<>(
|
return new UpdateInfo<>(
|
||||||
Topic.ENRICH_MISSING_AUTHOR_ORCID,
|
Topic.ENRICH_MISSING_AUTHOR_ORCID,
|
||||||
highlightValue, source, target,
|
highlightValue, source, target,
|
||||||
(p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()),
|
(p, aut) -> p.getCreators().add(aut),
|
||||||
pair -> pair.getLeft() + "::" + pair.getRight(),
|
aut -> aut,
|
||||||
dedupConfig);
|
dedupConfig);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue