enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
3 changed files with 25 additions and 52 deletions
Showing only changes of commit 04bebb708c - Show all commits

View File

@ -15,7 +15,7 @@ public class EnrichMissingProject extends UpdateMatcher<OaBrokerProject> {
super(20, super(20,
prj -> Topic.ENRICH_MISSING_PROJECT, prj -> Topic.ENRICH_MISSING_PROJECT,
(p, prj) -> p.getProjects().add(prj), (p, prj) -> p.getProjects().add(prj),
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); prj -> prj.getOpenaireId());
} }
@Override @Override

View File

@ -18,31 +18,25 @@ public class EnrichMoreProject extends UpdateMatcher<OaBrokerProject> {
super(20, super(20,
prj -> Topic.ENRICH_MORE_PROJECT, prj -> Topic.ENRICH_MORE_PROJECT,
(p, prj) -> p.getProjects().add(prj), (p, prj) -> p.getProjects().add(prj),
prj -> projectAsString(prj)); prj -> prj.getOpenaireId());
}
private static String projectAsString(final OaBrokerProject prj) {
return prj.getFunder() + "::" + prj.getFundingProgram() + "::" + prj.getCode();
} }
@Override @Override
protected List<OaBrokerProject> findDifferences(final OaBrokerMainEntity source, protected List<OaBrokerProject> findDifferences(final OaBrokerMainEntity source,
final OaBrokerMainEntity target) { final OaBrokerMainEntity target) {
if (target.getProjects().size() >= BrokerConstants.MAX_LIST_SIZE) { if (target.getProjects().size() >= BrokerConstants.MAX_LIST_SIZE) { return new ArrayList<>(); }
return new ArrayList<>();
}
final Set<String> existingProjects = target final Set<String> existingProjects = target
.getProjects() .getProjects()
.stream() .stream()
.map(EnrichMoreProject::projectAsString) .map(p -> p.getOpenaireId())
.collect(Collectors.toSet()); .collect(Collectors.toSet());
return source return source
.getProjects() .getProjects()
.stream() .stream()
.filter(p -> !existingProjects.contains(projectAsString(p))) .filter(p -> !existingProjects.contains(p.getOpenaireId()))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }

View File

@ -44,9 +44,7 @@ public class ConversionUtils {
private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
public static List<OaBrokerInstance> oafInstanceToBrokerInstances(final Instance i) { public static List<OaBrokerInstance> oafInstanceToBrokerInstances(final Instance i) {
if (i == null) { if (i == null) { return new ArrayList<>(); }
return new ArrayList<>();
}
return mappedList(i.getUrl(), url -> { return mappedList(i.getUrl(), url -> {
final OaBrokerInstance res = new OaBrokerInstance(); final OaBrokerInstance res = new OaBrokerInstance();
@ -67,9 +65,7 @@ public class ConversionUtils {
} }
public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
if (d == null) { if (d == null) { return null; }
return null;
}
final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset(); final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset();
res.setOpenaireId(d.getId()); res.setOpenaireId(d.getId());
@ -82,9 +78,7 @@ public class ConversionUtils {
} }
public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) { public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) {
if (p == null) { if (p == null) { return null; }
return null;
}
final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication(); final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication();
res.setOpenaireId(p.getId()); res.setOpenaireId(p.getId());
@ -98,9 +92,7 @@ public class ConversionUtils {
} }
public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) {
if (result == null) { if (result == null) { return null; }
return null;
}
final OaBrokerMainEntity res = new OaBrokerMainEntity(); final OaBrokerMainEntity res = new OaBrokerMainEntity();
@ -117,8 +109,7 @@ public class ConversionUtils {
res.setEmbargoenddate(fieldValue(result.getEmbargoenddate())); res.setEmbargoenddate(fieldValue(result.getEmbargoenddate()));
res.setContributor(fieldList(result.getContributor())); res.setContributor(fieldList(result.getContributor()));
res res
.setJournal( .setJournal(result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
res.setCollectedFromId(mappedFirst(result.getCollectedfrom(), KeyValue::getKey)); res.setCollectedFromId(mappedFirst(result.getCollectedfrom(), KeyValue::getKey));
res.setCollectedFromName(mappedFirst(result.getCollectedfrom(), KeyValue::getValue)); res.setCollectedFromName(mappedFirst(result.getCollectedfrom(), KeyValue::getValue));
res.setPids(mappedList(result.getPid(), ConversionUtils::oafPidToBrokerPid)); res.setPids(mappedList(result.getPid(), ConversionUtils::oafPidToBrokerPid));
@ -130,9 +121,7 @@ public class ConversionUtils {
} }
private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) { private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) {
if (author == null) { if (author == null) { return null; }
return null;
}
final String pids = author.getPid() != null ? author final String pids = author.getPid() != null ? author
.getPid() .getPid()
@ -142,6 +131,7 @@ public class ConversionUtils {
.filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid() != null)
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid")) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
.map(pid -> pid.getValue()) .map(pid -> pid.getValue())
.map(pid -> cleanOrcid(pid))
.filter(StringUtils::isNotBlank) .filter(StringUtils::isNotBlank)
.findFirst() .findFirst()
.orElse(null) : null; .orElse(null) : null;
@ -149,10 +139,13 @@ public class ConversionUtils {
return new OaBrokerAuthor(author.getFullname(), pids); return new OaBrokerAuthor(author.getFullname(), pids);
} }
private static String cleanOrcid(final String s) {
final String match = "//orcid.org/";
return s.contains(match) ? StringUtils.substringAfter(s, match) : s;
}
private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) { private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) {
if (journal == null) { if (journal == null) { return null; }
return null;
}
final OaBrokerJournal res = new OaBrokerJournal(); final OaBrokerJournal res = new OaBrokerJournal();
res.setName(journal.getName()); res.setName(journal.getName());
@ -164,9 +157,7 @@ public class ConversionUtils {
} }
private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) { private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
if (ref == null) { if (ref == null) { return null; }
return null;
}
final OaBrokerExternalReference res = new OaBrokerExternalReference(); final OaBrokerExternalReference res = new OaBrokerExternalReference();
res.setRefidentifier(ref.getRefidentifier()); res.setRefidentifier(ref.getRefidentifier());
@ -177,9 +168,7 @@ public class ConversionUtils {
} }
public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { public static final OaBrokerProject oafProjectToBrokerProject(final Project p) {
if (p == null) { if (p == null) { return null; }
return null;
}
final OaBrokerProject res = new OaBrokerProject(); final OaBrokerProject res = new OaBrokerProject();
res.setOpenaireId(p.getId()); res.setOpenaireId(p.getId());
@ -203,9 +192,7 @@ public class ConversionUtils {
} }
public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) {
if (sw == null) { if (sw == null) { return null; }
return null;
}
final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware(); final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware();
res.setOpenaireId(sw.getId()); res.setOpenaireId(sw.getId());
@ -268,9 +255,7 @@ public class ConversionUtils {
} }
private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) { private static List<OaBrokerTypedValue> structPropTypedList(final List<StructuredProperty> list) {
if (list == null) { if (list == null) { return new ArrayList<>(); }
return new ArrayList<>();
}
return list return list
.stream() .stream()
@ -280,9 +265,7 @@ public class ConversionUtils {
} }
private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) { private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) {
if (list == null) { if (list == null) { return new ArrayList<>(); }
return new ArrayList<>();
}
return list return list
.stream() .stream()
@ -293,9 +276,7 @@ public class ConversionUtils {
} }
private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) { private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) {
if (list == null) { if (list == null) { return new ArrayList<>(); }
return new ArrayList<>();
}
return list return list
.stream() .stream()
@ -307,9 +288,7 @@ public class ConversionUtils {
} }
private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) { private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) {
if (list == null) { if (list == null) { return null; }
return null;
}
return list return list
.stream() .stream()