From ac0da5a7eeb57686c3c493dde060de4ee9e396e3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 7 May 2020 12:31:26 +0200 Subject: [PATCH 01/10] Partial implementation of broker events --- .../eu/dnetlib/dhp/broker/model/Event.java | 101 ++++++++++++++ .../dhp/broker/model/EventFactory.java | 130 ++++++++++++++++++ .../broker/oa/GenerateEventsApplication.java | 106 ++++++++++++++ .../broker/oa/util/EnrichMissingAbstract.java | 30 ++++ .../oa/util/EnrichMissingAuthorOrcid.java | 30 ++++ .../oa/util/EnrichMissingOpenAccess.java | 31 +++++ .../dhp/broker/oa/util/EnrichMissingPid.java | 31 +++++ .../broker/oa/util/EnrichMissingProject.java | 31 +++++ .../oa/util/EnrichMissingPublicationDate.java | 30 ++++ .../broker/oa/util/EnrichMissingSubject.java | 35 +++++ .../broker/oa/util/EnrichMoreOpenAccess.java | 31 +++++ .../dhp/broker/oa/util/EnrichMorePid.java | 31 +++++ .../dhp/broker/oa/util/EnrichMoreSubject.java | 35 +++++ .../dhp/broker/oa/util/UpdateInfo.java | 35 +++++ 14 files changed, 687 insertions(+) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java new file mode 100644 index 0000000000..e9df9260ca --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java @@ -0,0 +1,101 @@ +package eu.dnetlib.dhp.broker.model; + +import java.util.Map; + +public class Event { + + private String eventId; + + private String producerId; + + private String topic; + + private String payload; + + private Long creationDate; + + private Long expiryDate; + + private boolean instantMessage; + + private Map map; + + public Event() {} + + public Event(final String producerId, final String eventId, final String topic, final String payload, final Long creationDate, final Long expiryDate, + final boolean instantMessage, + final Map map) { + this.producerId = producerId; + this.eventId = eventId; + this.topic = topic; + this.payload = payload; + this.creationDate = creationDate; + this.expiryDate = expiryDate; + this.instantMessage = instantMessage; + this.map = map; + } + + public String getProducerId() { + return this.producerId; + } + + public void setProducerId(final String producerId) { + this.producerId = producerId; + } + + public String getEventId() { + return this.eventId; + } + + public void setEventId(final String eventId) { + this.eventId = eventId; + } + + public String getTopic() { + return this.topic; + } + + public void setTopic(final String topic) { + this.topic = topic; + } + + public String getPayload() { + return this.payload; + } + + public void setPayload(final String payload) { + this.payload = payload; + } + + public Long getCreationDate() { + return this.creationDate; + } + + public void setCreationDate(final Long creationDate) { + this.creationDate = creationDate; + } + + public Long getExpiryDate() { + return this.expiryDate; + } + + public void setExpiryDate(final Long expiryDate) { + this.expiryDate = expiryDate; + } + + public boolean isInstantMessage() { + return this.instantMessage; + } + + public void setInstantMessage(final boolean instantMessage) { + this.instantMessage = instantMessage; + } + + public Map getMap() { + return this.map; + } + + public void setMap(final Map map) { + this.map = map; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java new file mode 100644 index 0000000000..6bee65eb07 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -0,0 +1,130 @@ +package eu.dnetlib.dhp.broker.model; + +import java.text.ParseException; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateUtils; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class EventFactory { + + private final static String PRODUCER_ID = "OpenAIRE"; + + private static final int TTH_DAYS = 365; + + private final static String[] DATE_PATTERNS = { + "yyyy-MM-dd" + }; + + public static Event newBrokerEvent(final Result source, final Result target, final UpdateInfo updateInfo) { + + final long now = new Date().getTime(); + + final Event res = new Event(); + + final Map map = createMapFromResult(target, source, updateInfo); + + final String payload = createPayload(target, updateInfo); + + final String eventId = + calculateEventId(updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); + + res.setEventId(eventId); + res.setProducerId(PRODUCER_ID); + res.setPayload(payload); + res.setMap(map); + res.setTopic(updateInfo.getTopic()); + res.setCreationDate(now); + res.setExpiryDate(calculateExpiryDate(now)); + res.setInstantMessage(false); + return res; + } + + private static String createPayload(final Result result, final UpdateInfo updateInfo) { + final OpenAireEventPayload payload = new OpenAireEventPayload(); + // TODO + + updateInfo.compileHighlight(payload); + + return payload.toJSON(); + } + + private static Map createMapFromResult(final Result oaf, final Result source, final UpdateInfo updateInfo) { + final Map map = new HashMap<>(); + + final List collectedFrom = oaf.getCollectedfrom(); + if (collectedFrom.size() == 1) { + map.put("target_datasource_id", collectedFrom.get(0).getKey()); + map.put("target_datasource_name", collectedFrom.get(0).getValue()); + } + + final List ids = oaf.getOriginalId(); + if (ids.size() > 0) { + map.put("target_publication_id", ids.get(0)); + } + + final List titles = oaf.getTitle(); + if (titles.size() > 0) { + map.put("target_publication_title", titles.get(0)); + } + + final long date = parseDateTolong(oaf.getDateofacceptance().getValue()); + if (date > 0) { + map.put("target_dateofacceptance", date); + } + + final List subjects = oaf.getSubject(); + if (subjects.size() > 0) { + map.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); + } + + final List authors = oaf.getAuthor(); + if (authors.size() > 0) { + map.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList())); + } + + // PROVENANCE INFO + map.put("trust", updateInfo.getTrust()); + final List sourceCollectedFrom = source.getCollectedfrom(); + if (sourceCollectedFrom.size() == 1) { + map.put("provenance_datasource_id", sourceCollectedFrom.get(0).getKey()); + map.put("provenance_datasource_name", sourceCollectedFrom.get(0).getValue()); + } + map.put("provenance_publication_id_list", source.getOriginalId()); + + return map; + } + + private static String calculateEventId(final String topic, final String publicationId, final String value) { + return "event-" + + DigestUtils.md5Hex(topic).substring(0, 6) + "-" + + DigestUtils.md5Hex(publicationId).substring(0, 8) + "-" + + DigestUtils.md5Hex(value).substring(0, 8); + } + + private static long calculateExpiryDate(final long now) { + return now + TTH_DAYS * 24 * 60 * 60 * 1000; + } + + private static long parseDateTolong(final String date) { + if (StringUtils.isBlank(date)) { return -1; } + try { + return DateUtils.parseDate(date, DATE_PATTERNS).getTime(); + } catch (final ParseException e) { + return -1; + } + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java new file mode 100644 index 0000000000..7b0ed0882b --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -0,0 +1,106 @@ +package eu.dnetlib.dhp.broker.oa; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.broker.model.Event; +import eu.dnetlib.dhp.broker.model.EventFactory; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAbstract; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingOpenAccess; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPid; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingProject; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.util.EnrichMissingSubject; +import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess; +import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class GenerateEventsApplication { + + private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils.toString(GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String graphPath = parser.get("graphPath"); + log.info("graphPath: {}", graphPath); + + final String eventsPath = parser.get("eventsPath"); + log.info("eventsPath: {}", eventsPath); + + final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, eventsPath); + generateEvents(spark, graphPath, eventsPath); + }); + + } + + private static void removeOutputDir(final SparkSession spark, final String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void generateEvents(final SparkSession spark, final String graphPath, final String eventsPath) { + // TODO + } + + private List generateEvents(final Result... children) { + final List list = new ArrayList<>(); + + for (final Result source : children) { + for (final Result target : children) { + if (source != target) { + list.addAll(findUpdates(source, target).stream() + .map(info -> EventFactory.newBrokerEvent(source, target, info)) + .collect(Collectors.toList())); + } + } + } + + return list; + } + + private List> findUpdates(final Result source, final Result target) { + final List> list = new ArrayList<>(); + list.addAll(EnrichMissingAbstract.findUpdates(source, target)); + list.addAll(EnrichMissingAuthorOrcid.findUpdates(source, target)); + list.addAll(EnrichMissingOpenAccess.findUpdates(source, target)); + list.addAll(EnrichMissingPid.findUpdates(source, target)); + list.addAll(EnrichMissingProject.findUpdates(source, target)); + list.addAll(EnrichMissingPublicationDate.findUpdates(source, target)); + list.addAll(EnrichMissingSubject.findUpdates(source, target)); + list.addAll(EnrichMoreOpenAccess.findUpdates(source, target)); + list.addAll(EnrichMorePid.findUpdates(source, target)); + list.addAll(EnrichMoreSubject.findUpdates(source, target)); + return list; + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java new file mode 100644 index 0000000000..5821adf1ec --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java @@ -0,0 +1,30 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingAbstract extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMissingAbstract(final String highlightValue, final float trust) { + super("ENRICH/MISSING/ABSTRACT", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getAbstracts().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java new file mode 100644 index 0000000000..6a8f36ed8a --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java @@ -0,0 +1,30 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingAuthorOrcid extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMissingAuthorOrcid(final String highlightValue, final float trust) { + super("ENRICH/MISSING/AUTHOR/ORCID", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + // TODO + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java new file mode 100644 index 0000000000..cd05b18a06 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.Instance; +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingOpenAccess extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMissingOpenAccess(final Instance highlightValue, final float trust) { + super("ENRICH/MISSING/OPENACCESS_VERSION", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getInstances().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue().getUrl(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java new file mode 100644 index 0000000000..1bab541881 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPid extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMissingPid(final Pid highlightValue, final float trust) { + super("ENRICH/MISSING/PID", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getPids().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue().getType() + "::" + getHighlightValue().getValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java new file mode 100644 index 0000000000..368c2babe1 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.broker.objects.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingProject extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMissingProject(final Project highlightValue, final float trust) { + super("ENRICH/MISSING/PROJECT", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getProjects().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + getHighlightValue().getCode(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java new file mode 100644 index 0000000000..abce480e34 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java @@ -0,0 +1,30 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationDate extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMissingPublicationDate(final String highlightValue, final float trust) { + super("ENRICH/MISSING/PUBLICATION_DATE", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().setPublicationdate(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java new file mode 100644 index 0000000000..6533d84873 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingSubject extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // MESHEUROPMC + // ARXIV + // JEL + // DDC + // ACM + + return Arrays.asList(); + } + + private EnrichMissingSubject(final String subjectClassification, final String highlightValue, final float trust) { + super("ENRICH/MISSING/SUBJECT/" + subjectClassification, highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getSubjects().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java new file mode 100644 index 0000000000..9b79d58284 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.Instance; +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreOpenAccess extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMoreOpenAccess(final Instance highlightValue, final float trust) { + super("ENRICH/MORE/OPENACCESS_VERSION", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getInstances().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue().getUrl(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java new file mode 100644 index 0000000000..37b9cfeb8a --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMorePid extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); + } + + private EnrichMorePid(final Pid highlightValue, final float trust) { + super("ENRICH/MORE/PID", highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getPids().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue().getType() + "::" + getHighlightValue().getValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java new file mode 100644 index 0000000000..84e2bbe5ee --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreSubject extends UpdateInfo { + + public static List findUpdates(final Result source, final Result target) { + // MESHEUROPMC + // ARXIV + // JEL + // DDC + // ACM + + return Arrays.asList(); + } + + private EnrichMoreSubject(final String subjectClassification, final String highlightValue, final float trust) { + super("ENRICH/MORE/SUBJECT/" + subjectClassification, highlightValue, trust); + } + + @Override + public void compileHighlight(final OpenAireEventPayload payload) { + payload.getHighlight().getSubjects().add(getHighlightValue()); + } + + @Override + public String getHighlightValueAsString() { + return getHighlightValue(); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java new file mode 100644 index 0000000000..73f3f2f5ab --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.broker.oa.util; + +import eu.dnetlib.broker.objects.OpenAireEventPayload; + +public abstract class UpdateInfo { + + private final String topic; + + private final T highlightValue; + + private final float trust; + + protected UpdateInfo(final String topic, final T highlightValue, final float trust) { + this.topic = topic; + this.highlightValue = highlightValue; + this.trust = trust; + } + + public T getHighlightValue() { + return highlightValue; + } + + public float getTrust() { + return trust; + } + + public String getTopic() { + return topic; + } + + abstract public void compileHighlight(OpenAireEventPayload payload); + + abstract public String getHighlightValueAsString(); + +} From 8c67073a071091819233ed2aece4e872b24af6fa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 09:42:21 +0200 Subject: [PATCH 02/10] force speculative execution to false --- .../eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 923f6de69b..298ac75892 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -405,6 +405,9 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false --inputPath${workingDir}/xml --isLookupUrl ${isLookupUrl} From 62ea19f1d394965b210ad1f7a68853c981e174aa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 09:43:26 +0200 Subject: [PATCH 03/10] introduced mapping for ExternalReferences, made urls defined within an instance unique --- .../dhp/schema/common/ModelConstants.java | 3 ++ .../migration/ProtoConverter.java | 44 +++++++++++++++---- .../dhp/oa/graph/raw/OafToOafMapper.java | 23 +++++----- .../dhp/oa/graph/raw/OdfToOafMapper.java | 15 +++---- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 926b021103..accc06d122 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -49,6 +49,9 @@ public class ModelConstants { public static final String HAS_PARTICIPANT = "hasParticipant"; public static final String IS_PARTICIPANT = "isParticipant"; + public static final String UNKNOWN = "UNKNOWN"; + public static final String NOT_AVAILABLE = "not available"; + public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier( PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID, DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java index 456113c438..90d573ac07 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java @@ -1,12 +1,10 @@ package eu.dnetlib.dhp.actionmanager.migration; -import static eu.dnetlib.data.proto.KindProtos.Kind.entity; -import static eu.dnetlib.data.proto.KindProtos.Kind.relation; -import static eu.dnetlib.data.proto.TypeProtos.*; -import static eu.dnetlib.data.proto.TypeProtos.Type.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -21,10 +19,6 @@ import eu.dnetlib.dhp.schema.oaf.*; public class ProtoConverter implements Serializable { - public static final String UNKNOWN = "UNKNOWN"; - public static final String NOT_AVAILABLE = "not available"; - public static final String DNET_ACCESS_MODES = "dnet:access_modes"; - public static Oaf convert(OafProtos.Oaf oaf) { try { switch (oaf.getKind()) { @@ -64,6 +58,7 @@ public class ProtoConverter implements Serializable { case result: final Result r = convertResult(oaf); r.setInstance(convertInstances(oaf)); + r.setExternalReference(convertExternalRefs(oaf)); return r; case project: return convertProject(oaf); @@ -94,13 +89,44 @@ public class ProtoConverter implements Serializable { i.setHostedby(mapKV(ri.getHostedby())); i.setInstancetype(mapQualifier(ri.getInstancetype())); i.setLicense(mapStringField(ri.getLicense())); - i.setUrl(ri.getUrlList()); + i + .setUrl( + ri.getUrlList() != null ? ri + .getUrlList() + .stream() + .distinct() + .collect(Collectors.toCollection(ArrayList::new)) : null); i.setRefereed(mapStringField(ri.getRefereed())); i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount())); i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency())); return i; } + private static List convertExternalRefs(OafProtos.Oaf oaf) { + ResultProtos.Result r = oaf.getEntity().getResult(); + if (r.getExternalReferenceCount() > 0) { + return r + .getExternalReferenceList() + .stream() + .map(e -> convertExtRef(e)) + .collect(Collectors.toList()); + } + return Lists.newArrayList(); + } + + private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) { + ExternalReference ex = new ExternalReference(); + ex.setUrl(e.getUrl()); + ex.setSitename(e.getSitename()); + ex.setRefidentifier(e.getRefidentifier()); + ex.setQuery(e.getQuery()); + ex.setQualifier(mapQualifier(e.getQualifier())); + ex.setLabel(e.getLabel()); + ex.setDescription(e.getDescription()); + ex.setDataInfo(ex.getDataInfo()); + return ex; + } + private static Organization convertOrganization(OafProtos.Oaf oaf) { final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata(); final Organization org = setOaf(new Organization(), oaf); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 08a89cb228..891fee57e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -5,10 +5,8 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -115,12 +113,17 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .setProcessingchargecurrency( field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); - for (final Object o : doc.selectNodes("//dc:identifier")) { - final String url = ((Node) o).getText().trim(); - if (url.startsWith("http")) { - instance.setUrl(Arrays.asList(url)); - } - } + List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); + instance + .setUrl( + nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); + return Lists.newArrayList(instance); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 92a37c0675..04984d0086 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -6,10 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -80,6 +77,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final KeyValue hostedby) { final Instance instance = new Instance(); + final Set url = new HashSet<>(); instance.setUrl(new ArrayList<>()); instance .setInstancetype( @@ -100,17 +98,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { - instance.getUrl().add(((Node) o).getText().trim()); + url.add(((Node) o).getText().trim()); } for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { - instance.getUrl().add(((Node) o).getText().trim()); + url.add(((Node) o).getText().trim()); } for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { - instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); + url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { - instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); + url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } + instance.getUrl().addAll(url); return Arrays.asList(instance); } From 3420998bb4dd4e6ecaa67cfcc9be1c2411a27a5a Mon Sep 17 00:00:00 2001 From: miconis Date: Fri, 8 May 2020 15:43:30 +0200 Subject: [PATCH 04/10] reltype set in mergerels --- .../java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index a446508233..3d0f28db54 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -137,10 +137,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction { } private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) { - Relation r = new Relation(); + + String entityType = dedupConf.getWf().getEntityType(); + + Relation r = new Relation(); r.setSource(source); r.setTarget(target); r.setRelClass(relClass); + r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1)); r.setSubRelType("dedup"); DataInfo info = new DataInfo(); From 8fd1952f16fd281ce815da305dd1fcd4a135401c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 16:01:09 +0200 Subject: [PATCH 05/10] code formatting --- .../actionmanager/migration/MigrateActionSet.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java index 89cb63fabf..8143b81e5a 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java @@ -82,10 +82,9 @@ public class MigrateActionSet { List targetPaths = new ArrayList<>(); final List sourcePaths = getSourcePaths(sourceNN, isLookUp); - log - .info( - "paths to process:\n{}", - sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n"))); + log.info("paths to process:\n{}", sourcePaths + .stream().map(p -> p.toString()).collect(Collectors.joining("\n"))); + for (Path source : sourcePaths) { if (!sourceFS.exists(source)) { @@ -119,9 +118,8 @@ public class MigrateActionSet { } } - props - .setProperty( - TARGET_PATHS, targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(","))); + final String targetPathsCsv = targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(",")); + props.setProperty(TARGET_PATHS, targetPathsCsv); File file = new File(System.getProperty("oozie.action.output.properties")); try (OutputStream os = new FileOutputStream(file)) { From 5b28bb4131c43899bd0b507321d6f611fa89cd24 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 16:49:47 +0200 Subject: [PATCH 06/10] code formatting --- .../migration/MigrateActionSet.java | 8 +++++-- .../eu/dnetlib/dhp/broker/model/Event.java | 7 ++++-- .../dhp/broker/model/EventFactory.java | 22 ++++++++++++++----- .../broker/oa/GenerateEventsApplication.java | 16 +++++++++----- .../broker/oa/util/EnrichMissingAbstract.java | 1 + .../oa/util/EnrichMissingAuthorOrcid.java | 1 + .../oa/util/EnrichMissingOpenAccess.java | 1 + .../dhp/broker/oa/util/EnrichMissingPid.java | 1 + .../broker/oa/util/EnrichMissingProject.java | 4 +++- .../oa/util/EnrichMissingPublicationDate.java | 1 + .../broker/oa/util/EnrichMissingSubject.java | 1 + .../broker/oa/util/EnrichMoreOpenAccess.java | 1 + .../dhp/broker/oa/util/EnrichMorePid.java | 1 + .../dhp/broker/oa/util/EnrichMoreSubject.java | 1 + .../dhp/broker/oa/util/UpdateInfo.java | 1 + .../dhp/oa/dedup/SparkCreateMergeRels.java | 4 ++-- 16 files changed, 53 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java index 8143b81e5a..77be7652e2 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java @@ -82,8 +82,12 @@ public class MigrateActionSet { List targetPaths = new ArrayList<>(); final List sourcePaths = getSourcePaths(sourceNN, isLookUp); - log.info("paths to process:\n{}", sourcePaths - .stream().map(p -> p.toString()).collect(Collectors.joining("\n"))); + log + .info( + "paths to process:\n{}", sourcePaths + .stream() + .map(p -> p.toString()) + .collect(Collectors.joining("\n"))); for (Path source : sourcePaths) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java index e9df9260ca..0512a38134 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.model; import java.util.Map; @@ -20,9 +21,11 @@ public class Event { private Map map; - public Event() {} + public Event() { + } - public Event(final String producerId, final String eventId, final String topic, final String payload, final Long creationDate, final Long expiryDate, + public Event(final String producerId, final String eventId, final String topic, final String payload, + final Long creationDate, final Long expiryDate, final boolean instantMessage, final Map map) { this.producerId = producerId; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 6bee65eb07..0694556b2e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.model; import java.text.ParseException; @@ -38,8 +39,8 @@ public class EventFactory { final String payload = createPayload(target, updateInfo); - final String eventId = - calculateEventId(updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); + final String eventId = calculateEventId( + updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); res.setEventId(eventId); res.setProducerId(PRODUCER_ID); @@ -61,7 +62,8 @@ public class EventFactory { return payload.toJSON(); } - private static Map createMapFromResult(final Result oaf, final Result source, final UpdateInfo updateInfo) { + private static Map createMapFromResult(final Result oaf, final Result source, + final UpdateInfo updateInfo) { final Map map = new HashMap<>(); final List collectedFrom = oaf.getCollectedfrom(); @@ -87,12 +89,18 @@ public class EventFactory { final List subjects = oaf.getSubject(); if (subjects.size() > 0) { - map.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); + map + .put( + "target_publication_subject_list", + subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); } final List authors = oaf.getAuthor(); if (authors.size() > 0) { - map.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList())); + map + .put( + "target_publication_author_list", + authors.stream().map(Author::getFullname).collect(Collectors.toList())); } // PROVENANCE INFO @@ -119,7 +127,9 @@ public class EventFactory { } private static long parseDateTolong(final String date) { - if (StringUtils.isBlank(date)) { return -1; } + if (StringUtils.isBlank(date)) { + return -1; + } try { return DateUtils.parseDate(date, DATE_PATTERNS).getTime(); } catch (final ParseException e) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 7b0ed0882b..54d4ef36aa 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -40,8 +41,10 @@ public class GenerateEventsApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils.toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + IOUtils + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -78,9 +81,12 @@ public class GenerateEventsApplication { for (final Result source : children) { for (final Result target : children) { if (source != target) { - list.addAll(findUpdates(source, target).stream() - .map(info -> EventFactory.newBrokerEvent(source, target, info)) - .collect(Collectors.toList())); + list + .addAll( + findUpdates(source, target) + .stream() + .map(info -> EventFactory.newBrokerEvent(source, target, info)) + .collect(Collectors.toList())); } } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java index 5821adf1ec..493d1f97c7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java index 6a8f36ed8a..6899c62a37 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java index cd05b18a06..9464130f31 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java index 1bab541881..293d4993f3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java index 368c2babe1..a22c179a20 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; @@ -25,7 +26,8 @@ public class EnrichMissingProject extends UpdateInfo { @Override public String getHighlightValueAsString() { - return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + getHighlightValue().getCode(); + return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + + getHighlightValue().getCode(); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java index abce480e34..869dca2645 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java index 6533d84873..a2ed5d0439 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java index 9b79d58284..4f1e88d3d5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java index 37b9cfeb8a..ecf2cf3107 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java index 84e2bbe5ee..f29b86292d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 73f3f2f5ab..f7b6b69e9e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import eu.dnetlib.broker.objects.OpenAireEventPayload; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 3d0f28db54..c0503d991d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -138,9 +138,9 @@ public class SparkCreateMergeRels extends AbstractSparkAction { private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) { - String entityType = dedupConf.getWf().getEntityType(); + String entityType = dedupConf.getWf().getEntityType(); - Relation r = new Relation(); + Relation r = new Relation(); r.setSource(source); r.setTarget(target); r.setRelClass(relClass); From 6e47c724c6fdd9ed34c510af4e2e69ceb3e0dbc3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 17:01:27 +0200 Subject: [PATCH 07/10] [maven-release-plugin] prepare release dhp-1.1.7 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 6 ++---- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-schemas/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 5 ++--- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-dedup-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-worfklow-profiles/pom.xml | 6 ++---- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 19 files changed, 23 insertions(+), 28 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 9b03536ddd..8141a050eb 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.1.7-SNAPSHOT + 1.1.7 dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 4d40edd997..fef761b1f5 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.1.7-SNAPSHOT + 1.1.7 dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 5e896e7a5c..c3a0621736 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -1,13 +1,11 @@ - + 4.0.0 eu.dnetlib.dhp dhp-code-style - 1.1.7-SNAPSHOT + 1.1.7 jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 041641fcfb..e9680f9f3b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.1.7-SNAPSHOT + 1.1.7 dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 51af8d954b..7a91721193 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.1.7-SNAPSHOT + 1.1.7 ../ diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 4a123cedad..47b49a5c1f 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.1.7-SNAPSHOT + 1.1.7 ../ diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 22ca7504d5..92afe32da3 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -1,11 +1,10 @@ - + 4.0.0 eu.dnetlib.dhp dhp-workflows - 1.1.7-SNAPSHOT + 1.1.7 dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 3e7b1a3750..8a06f575f5 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.1.7-SNAPSHOT + 1.1.7 dhp-aggregation diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index a57c4ba25f..fadd995e09 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index e7f2a926f4..690bbec885 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index e87811cd5d..abb2c225fc 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index d013dd1d98..4645c74862 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index d25446bbc3..a6b3b53a36 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index 03604f4319..9c41703160 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index baac163d27..efa6b92a0b 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 0f5e180824..058d2865a3 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index bad72a9ef0..45ab9ff421 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.1.7-SNAPSHOT + 1.1.7 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index ea34339032..7288f27af0 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.1.7-SNAPSHOT + 1.1.7 ../ diff --git a/pom.xml b/pom.xml index 4838732190..a35c8c7100 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.1.7-SNAPSHOT + 1.1.7 pom @@ -38,7 +38,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD + dhp-1.1.7 This module is the root descriptor for the dnet-hadoop project From 0ccc864ad9c9a11ca9ec5b6358c16354db43bb4a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 17:01:31 +0200 Subject: [PATCH 08/10] [maven-release-plugin] prepare for next development iteration --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-schemas/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-dedup-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-worfklow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 19 files changed, 20 insertions(+), 20 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 8141a050eb..92a939d195 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.1.7 + 1.1.8-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index fef761b1f5..551f041542 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.1.7 + 1.1.8-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index c3a0621736..4bf8ef4b6f 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.1.7 + 1.1.8-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index e9680f9f3b..44c0763d63 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.1.7 + 1.1.8-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 7a91721193..388998b945 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.1.7 + 1.1.8-SNAPSHOT ../ diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 47b49a5c1f..fab26b3848 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.1.7 + 1.1.8-SNAPSHOT ../ diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 92afe32da3..2327b97e26 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.1.7 + 1.1.8-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 8a06f575f5..19237a5a81 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.1.7 + 1.1.8-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index fadd995e09..c94c79f2cb 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 690bbec885..de39401b97 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index abb2c225fc..38f630451e 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index 4645c74862..456e4dfa39 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index a6b3b53a36..9c407d2319 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index 9c41703160..65218318a7 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index efa6b92a0b..291082df71 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 058d2865a3..29015f9204 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index 45ab9ff421..b305b5b8c3 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.1.7 + 1.1.8-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 7288f27af0..4c37bcf684 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.1.7 + 1.1.8-SNAPSHOT ../ diff --git a/pom.xml b/pom.xml index a35c8c7100..2071519060 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.1.7 + 1.1.8-SNAPSHOT pom @@ -38,7 +38,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - dhp-1.1.7 + HEAD This module is the root descriptor for the dnet-hadoop project From fd519df616bcf96df3c8a400a4fe107477271cef Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 May 2020 19:00:38 +0200 Subject: [PATCH 09/10] new rels produced by dedup workflow must be unique --- .../dhp/oa/dedup/SparkPropagateRelation.java | 3 ++- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 18 ++++++++++++++++++ .../eu/dnetlib/dhp/dedup/test/relation_1.json | 12 ++++++++++++ .../eu/dnetlib/dhp/dedup/test/relation_2.json | 10 ++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 2d18c9a612..5168085116 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -86,7 +86,8 @@ public class SparkPropagateRelation extends AbstractSparkAction { mergedIds, FieldType.TARGET, getFixRelFn(FieldType.TARGET)) - .filter(SparkPropagateRelation::containsDedup); + .filter(SparkPropagateRelation::containsDedup) + .distinct(); Dataset updated = processDataset( processDataset(rels, mergedIds, FieldType.SOURCE, getDeletedFn()), diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 990ac04c03..2321762899 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -12,12 +12,14 @@ import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -450,6 +452,22 @@ public class SparkDedupTest implements Serializable { assertEquals(updated, deletedbyinference); } + @Test + @Order(6) + public void testRelations() throws Exception { + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); + } + + private void testUniqueness(String path, int expected_total, int expected_unique) { + Dataset rel = spark.read() + .textFile(getClass().getResource(path).getPath()) + .map((MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), Encoders.bean(Relation.class)); + + assertEquals(expected_total, rel.count()); + assertEquals(expected_unique, rel.distinct().count()); + } + @AfterAll public static void finalCleanUp() throws IOException { FileUtils.deleteDirectory(new File(testOutputBasePath)); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json new file mode 100644 index 0000000000..c0cf8b6956 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json @@ -0,0 +1,12 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::40c7b1dfa18c3693d374dafd21ef852f","subRelType":"provision","target":"10|doajarticles::618df40624078491acfd93ca3ff6921c"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::0b4e756a73338f60b84de98d080f6422","subRelType":"provision","target":"10|doajarticles::6d01e689db13b6977b411f4170b6143b"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::fe2f7c9d350b9c5aa658ec384d761e33","subRelType":"provision","target":"10|doajarticles::9b8a956b0703854ba79e52ddf7dc552e"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::a116734108ba011ef715b012f095e3f5","subRelType":"provision","target":"10|doajarticles::c5de04b1a35da2cc4468e299bc9ffa16"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|opendoar____::8b83abbbcad5496fe43cda88d0045aa4","subRelType":"provision","target":"10|opendoar____::6855456e2fe46a9d49d3d3af4f57443d"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|opendoar____::88034de0247d9d36e22783e9319c5ba3","subRelType":"provision","target":"10|opendoar____::c17028c9b6e0c5deaad29665d582284a"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|opendoar____::dfb21c796f33e9acf505cc960a3d8d2c","subRelType":"provision","target":"10|opendoar____::dfa037a53e121ecc9e0926800c3e814e"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::b526b1aa1562038881a31be59896985f","subRelType":"provision","target":"10|re3data_____::2e457773b62df3534cc04441bf406a70"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::6b306183bc051b5aaa5376f2fab6e6e5","subRelType":"provision","target":"10|re3data_____::6371ff9ee1ec7073416cb83c868b10a3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::0f697c2543a43bc0da793bf78ecd4996","subRelType":"provision","target":"10|re3data_____::770ef1f8eb03f174c0add746523c6f28"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::0f697c2543a43bc0da793bf78ecd4996","subRelType":"provision","target":"10|re3data_____::770ef1f8eb03f174c0add746523c6f28"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::0f697c2543a43bc0da793bf78ecd4996","subRelType":"provision","target":"10|re3data_____::770ef1f8eb03f174c0add746523c6f28"} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json new file mode 100644 index 0000000000..00db9715b0 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json @@ -0,0 +1,10 @@ +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"} +{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681629"} \ No newline at end of file From ae0f535c73a7f8086a16ade8f5e24fd60f3dda12 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Sat, 9 May 2020 22:34:48 +0300 Subject: [PATCH 10/10] Fixing hardcoded reference to main openAIRE graph db --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql index d4ca2e10ec..461f48bfce 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql @@ -1,2 +1,2 @@ DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages; -CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) as id, langs.languages as language from openaire.datasource d LATERAL VIEW explode(d.odlanguages.value) langs as languages; +CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) as id, langs.languages as language from ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs as languages;