1
0
Fork 0

partial implemantation of broker events generation

This commit is contained in:
Michele Artini 2020-06-05 11:43:00 +02:00
parent 7e82996e7c
commit a73973a74b
5 changed files with 128 additions and 66 deletions

View File

@ -57,7 +57,7 @@
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaire-broker-common</artifactId> <artifactId>dnet-openaire-broker-common</artifactId>
<version>[2.0.0,3.0.0)</version> <version>[2.0.1,3.0.0)</version>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@ -12,7 +12,6 @@ import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.lang3.time.DateUtils;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.KeyValue;
@ -37,14 +36,12 @@ public class EventFactory {
final Map<String, Object> map = createMapFromResult(updateInfo); final Map<String, Object> map = createMapFromResult(updateInfo);
final String payload = createPayload(updateInfo);
final String eventId = final String eventId =
calculateEventId(updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0), updateInfo.getHighlightValueAsString()); calculateEventId(updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0), updateInfo.getHighlightValueAsString());
res.setEventId(eventId); res.setEventId(eventId);
res.setProducerId(PRODUCER_ID); res.setProducerId(PRODUCER_ID);
res.setPayload(payload); res.setPayload(updateInfo.asBrokerPayload().toJSON());
res.setMap(map); res.setMap(map);
res.setTopic(updateInfo.getTopicPath()); res.setTopic(updateInfo.getTopicPath());
res.setCreationDate(now); res.setCreationDate(now);
@ -53,15 +50,6 @@ public class EventFactory {
return res; return res;
} }
private static String createPayload(final UpdateInfo<?> updateInfo) {
final OpenAireEventPayload payload = new OpenAireEventPayload();
// TODO : use ConversionUtils
updateInfo.compileHighlight(payload);
return payload.toJSON();
}
private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) { private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
final Map<String, Object> map = new HashMap<>(); final Map<String, Object> map = new HashMap<>();

View File

@ -39,7 +39,7 @@ public abstract class AbstractEnrichMissingPublication
.getRight() .getRight()
.stream() .stream()
.filter(d -> !existingPublications.contains(d.getId())) .filter(d -> !existingPublications.contains(d.getId()))
.map(ConversionUtils::oafPublicationToBrokerPublication) .map(ConversionUtils::oafResultToBrokerPublication)
.map(i -> generateUpdateInfo(i, source, target)) .map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList()); .collect(Collectors.toList());

View File

@ -5,83 +5,130 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ConversionUtils { public class ConversionUtils {
public static List<Instance> oafInstanceToBrokerInstances(final eu.dnetlib.dhp.schema.oaf.Instance i) { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
public static List<eu.dnetlib.broker.objects.Instance> oafInstanceToBrokerInstances(final Instance i) {
return i.getUrl().stream().map(url -> { return i.getUrl().stream().map(url -> {
final Instance r = new Instance(); return new eu.dnetlib.broker.objects.Instance()
r.setUrl(url); .setUrl(url)
r.setInstancetype(i.getInstancetype().getClassid()); .setInstancetype(i.getInstancetype().getClassid())
r.setLicense(BrokerConstants.OPEN_ACCESS); .setLicense(BrokerConstants.OPEN_ACCESS)
r.setHostedby(i.getHostedby().getValue()); .setHostedby(i.getHostedby().getValue());
return r;
}).collect(Collectors.toList()); }).collect(Collectors.toList());
} }
public static Pid oafPidToBrokerPid(final StructuredProperty sp) { public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
final Pid pid = new Pid(); return sp != null ? new Pid()
pid.setValue(sp.getValue()); .setValue(sp.getValue())
pid.setType(sp.getQualifier().getClassid()); .setType(sp.getQualifier().getClassid()) : null;
return pid;
} }
public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) { public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) {
return Pair.of(sp.getQualifier().getClassid(), sp.getValue()); return sp != null ? Pair.of(sp.getQualifier().getClassid(), sp.getValue()) : null;
} }
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) { public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset(); return d != null ? new eu.dnetlib.broker.objects.Dataset()
res.setOriginalId(d.getOriginalId().get(0)); .setOriginalId(d.getOriginalId().get(0))
res.setTitles(structPropList(d.getTitle())); .setTitles(structPropList(d.getTitle()))
res.setPids(d.getPid().stream().map(ConversionUtils::structeredPropertyToPid).collect(Collectors.toList())); .setPids(d.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
res.setInstances(d.getInstance().stream().map(ConversionUtils::oafInstanceToBrokerInstances).flatMap(List::stream).collect(Collectors.toList())); .setInstances(d.getInstance().stream().map(ConversionUtils::oafInstanceToBrokerInstances).flatMap(List::stream).collect(Collectors.toList()))
res.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList())); .setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
return res; : null;
} }
public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) { public static final eu.dnetlib.broker.objects.Publication oafResultToBrokerPublication(final Result result) {
final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication();
// TODO This should be reusable return result != null ? new eu.dnetlib.broker.objects.Publication()
return res; .setOriginalId(result.getOriginalId().get(0))
.setTitles(structPropList(result.getTitle()))
.setAbstracts(fieldList(result.getDescription()))
.setLanguage(result.getLanguage().getClassid())
.setSubjects(structPropList(result.getSubject()))
.setCreators(result.getAuthor().stream().map(Author::getFullname).collect(Collectors.toList()))
.setPublicationdate(result.getDateofcollection())
.setPublisher(fieldValue(result.getPublisher()))
.setEmbargoenddate(fieldValue(result.getEmbargoenddate()))
.setContributor(fieldList(result.getContributor()))
.setJournal(result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null)
.setCollectedFrom(result.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
.setPids(result.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
.setInstances(result.getInstance().stream().map(ConversionUtils::oafInstanceToBrokerInstances).flatMap(List::stream).collect(Collectors.toList()))
.setExternalReferences(result.getExternalReference().stream().map(ConversionUtils::oafExtRefToBrokerExtRef).collect(Collectors.toList()))
: null;
}
private static eu.dnetlib.broker.objects.Journal oafJournalToBrokerJournal(final Journal journal) {
return journal != null ? new eu.dnetlib.broker.objects.Journal()
.setName(journal.getName())
.setIssn(journal.getIssnPrinted())
.setEissn(journal.getIssnOnline())
.setLissn(journal.getIssnLinking()) : null;
}
private static eu.dnetlib.broker.objects.ExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
return ref != null ? new eu.dnetlib.broker.objects.ExternalReference()
.setRefidentifier(ref.getRefidentifier())
.setSitename(ref.getSitename())
.setType(ref.getQualifier().getClassid())
.setUrl(ref.getUrl())
: null;
} }
public static final eu.dnetlib.broker.objects.Project oafProjectToBrokerProject(final Project p) { public static final eu.dnetlib.broker.objects.Project oafProjectToBrokerProject(final Project p) {
final eu.dnetlib.broker.objects.Project res = new eu.dnetlib.broker.objects.Project(); if (p == null) { return null; }
res.setTitle(fieldValue(p.getTitle()));
res.setAcronym(fieldValue(p.getAcronym())); final eu.dnetlib.broker.objects.Project res = new eu.dnetlib.broker.objects.Project()
res.setCode(fieldValue(p.getCode())); .setTitle(fieldValue(p.getTitle()))
res.setFunder(null); // TODO .setAcronym(fieldValue(p.getAcronym()))
res.setFundingProgram(null); // TODO .setCode(fieldValue(p.getCode()));
res.setJurisdiction(null); // TODO
final String ftree = fieldValue(p.getFundingtree());
if (StringUtils.isNotBlank(ftree)) {
try {
final Document fdoc = DocumentHelper.parseText(ftree);
res.setFunder(fdoc.valueOf("/fundingtree/funder/shortname"));
res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction"));
res.setFundingProgram(fdoc.valueOf("//funding_level_0/name"));
} catch (final DocumentException e) {
log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree);
}
}
return res; return res;
} }
public static final eu.dnetlib.broker.objects.Software oafSoftwareToBrokerSoftware(final Software sw) { public static final eu.dnetlib.broker.objects.Software oafSoftwareToBrokerSoftware(final Software sw) {
final eu.dnetlib.broker.objects.Software res = new eu.dnetlib.broker.objects.Software(); return sw != null ? new eu.dnetlib.broker.objects.Software()
res.setName(structPropValue(sw.getTitle())); .setName(structPropValue(sw.getTitle()))
res.setDescription(fieldValue(sw.getDescription())); .setDescription(fieldValue(sw.getDescription()))
res.setRepository(fieldValue(sw.getCodeRepositoryUrl())); .setRepository(fieldValue(sw.getCodeRepositoryUrl()))
res.setLandingPage(fieldValue(sw.getDocumentationUrl())); .setLandingPage(fieldValue(sw.getDocumentationUrl()))
return res; : null;
}
private static Pid structeredPropertyToPid(final StructuredProperty sp) {
final Pid pid = new Pid();
pid.setValue(sp.getValue());
pid.setType(sp.getQualifier().getClassid());
return pid;
} }
private static String fieldValue(final Field<String> f) { private static String fieldValue(final Field<String> f) {
@ -89,14 +136,20 @@ public class ConversionUtils {
} }
private static String fieldValue(final List<Field<String>> fl) { private static String fieldValue(final List<Field<String>> fl) {
return fl != null && !fl.isEmpty() && fl.get(0) != null ? fl.get(0).getValue() : null; return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) : null;
} }
private static String structPropValue(final List<StructuredProperty> props) { private static String structPropValue(final List<StructuredProperty> props) {
return props != null && !props.isEmpty() && props.get(0) != null ? props.get(0).getValue() : null; return props != null ? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) : null;
}
private static List<String> fieldList(final List<Field<String>> fl) {
return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList())
: new ArrayList<>();
} }
private static List<String> structPropList(final List<StructuredProperty> props) { private static List<String> structPropList(final List<StructuredProperty> props) {
return props != null ? props.stream().map(StructuredProperty::getValue).collect(Collectors.toList()) : new ArrayList<>(); return props != null ? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList())
: new ArrayList<>();
} }
} }

View File

@ -1,12 +1,16 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.List;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;
import java.util.function.Function; import java.util.function.Function;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.Provenance;
import eu.dnetlib.broker.objects.Publication; import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public final class UpdateInfo<T> { public final class UpdateInfo<T> {
@ -66,12 +70,29 @@ public final class UpdateInfo<T> {
return trust; return trust;
} }
public void compileHighlight(final OpenAireEventPayload payload) {
compileHighlight.accept(payload.getHighlight(), getHighlightValue());
}
public String getHighlightValueAsString() { public String getHighlightValueAsString() {
return highlightToString.apply(getHighlightValue()); return highlightToString.apply(getHighlightValue());
} }
public OpenAireEventPayload asBrokerPayload() {
final Publication p = ConversionUtils.oafResultToBrokerPublication(getSource());
compileHighlight.accept(p, getHighlightValue());
final Publication hl = new Publication();
compileHighlight.accept(hl, getHighlightValue());
final String provId = getSource().getOriginalId().stream().findFirst().orElse(null);
final String provRepo = getSource().getCollectedfrom().stream().map(KeyValue::getValue).findFirst().orElse(null);
final String provUrl = getSource().getInstance().stream().map(Instance::getUrl).flatMap(List::stream).findFirst().orElse(null);;
final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);
return new OpenAireEventPayload()
.setPublication(p)
.setHighlight(hl)
.setTrust(trust)
.setProvenance(provenance);
}
} }