diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml
index 9b03536dd..e35e1d515 100644
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
dhp-build-assembly-resources
diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
index 4d40edd99..e16184c27 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
dhp-build-properties-maven-plugin
diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml
index 5e896e7a5..72ccbba3c 100644
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@@ -1,13 +1,11 @@
-
+
4.0.0
eu.dnetlib.dhp
dhp-code-style
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
jar
diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml
index 041641fcf..df9bafabd 100644
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@@ -4,7 +4,7 @@
eu.dnetlib.dhp
dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
dhp-build
pom
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index f1470a60c..a2d5ce997 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
../
diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml
index 4a123ceda..343518a51 100644
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
../
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
index 926b02110..accc06d12 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
@@ -49,6 +49,9 @@ public class ModelConstants {
public static final String HAS_PARTICIPANT = "hasParticipant";
public static final String IS_PARTICIPANT = "isParticipant";
+ public static final String UNKNOWN = "UNKNOWN";
+ public static final String NOT_AVAILABLE = "not available";
+
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES);
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
index ad5e9cebe..c928992aa 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
@@ -41,6 +41,12 @@ public class Relation extends Oaf {
*/
private String target;
+ /**
+ * List of relation specific properties. Values include 'similarityLevel', indicating the similarity score between a
+ * pair of publications.
+ */
+ private List properties = new ArrayList<>();
+
public String getRelType() {
return relType;
}
@@ -81,6 +87,14 @@ public class Relation extends Oaf {
this.target = target;
}
+ public List getProperties() {
+ return properties;
+ }
+
+ public void setProperties(List properties) {
+ this.properties = properties;
+ }
+
public void mergeFrom(final Relation r) {
checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml
index 22ca7504d..52eb31b3d 100644
--- a/dhp-workflows/dhp-actionmanager/pom.xml
+++ b/dhp-workflows/dhp-actionmanager/pom.xml
@@ -1,11 +1,10 @@
-
+
4.0.0
eu.dnetlib.dhp
dhp-workflows
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
dhp-actionmanager
diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
index 89cb63fab..77be7652e 100644
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/MigrateActionSet.java
@@ -84,8 +84,11 @@ public class MigrateActionSet {
final List sourcePaths = getSourcePaths(sourceNN, isLookUp);
log
.info(
- "paths to process:\n{}",
- sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n")));
+ "paths to process:\n{}", sourcePaths
+ .stream()
+ .map(p -> p.toString())
+ .collect(Collectors.joining("\n")));
+
for (Path source : sourcePaths) {
if (!sourceFS.exists(source)) {
@@ -119,9 +122,8 @@ public class MigrateActionSet {
}
}
- props
- .setProperty(
- TARGET_PATHS, targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(",")));
+ final String targetPathsCsv = targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(","));
+ props.setProperty(TARGET_PATHS, targetPathsCsv);
File file = new File(System.getProperty("oozie.action.output.properties"));
try (OutputStream os = new FileOutputStream(file)) {
diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
index 456113c43..90d573ac0 100644
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java
@@ -1,12 +1,10 @@
package eu.dnetlib.dhp.actionmanager.migration;
-import static eu.dnetlib.data.proto.KindProtos.Kind.entity;
-import static eu.dnetlib.data.proto.KindProtos.Kind.relation;
-import static eu.dnetlib.data.proto.TypeProtos.*;
-import static eu.dnetlib.data.proto.TypeProtos.Type.*;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
@@ -21,10 +19,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class ProtoConverter implements Serializable {
- public static final String UNKNOWN = "UNKNOWN";
- public static final String NOT_AVAILABLE = "not available";
- public static final String DNET_ACCESS_MODES = "dnet:access_modes";
-
public static Oaf convert(OafProtos.Oaf oaf) {
try {
switch (oaf.getKind()) {
@@ -64,6 +58,7 @@ public class ProtoConverter implements Serializable {
case result:
final Result r = convertResult(oaf);
r.setInstance(convertInstances(oaf));
+ r.setExternalReference(convertExternalRefs(oaf));
return r;
case project:
return convertProject(oaf);
@@ -94,13 +89,44 @@ public class ProtoConverter implements Serializable {
i.setHostedby(mapKV(ri.getHostedby()));
i.setInstancetype(mapQualifier(ri.getInstancetype()));
i.setLicense(mapStringField(ri.getLicense()));
- i.setUrl(ri.getUrlList());
+ i
+ .setUrl(
+ ri.getUrlList() != null ? ri
+ .getUrlList()
+ .stream()
+ .distinct()
+ .collect(Collectors.toCollection(ArrayList::new)) : null);
i.setRefereed(mapStringField(ri.getRefereed()));
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
return i;
}
+ private static List convertExternalRefs(OafProtos.Oaf oaf) {
+ ResultProtos.Result r = oaf.getEntity().getResult();
+ if (r.getExternalReferenceCount() > 0) {
+ return r
+ .getExternalReferenceList()
+ .stream()
+ .map(e -> convertExtRef(e))
+ .collect(Collectors.toList());
+ }
+ return Lists.newArrayList();
+ }
+
+ private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
+ ExternalReference ex = new ExternalReference();
+ ex.setUrl(e.getUrl());
+ ex.setSitename(e.getSitename());
+ ex.setRefidentifier(e.getRefidentifier());
+ ex.setQuery(e.getQuery());
+ ex.setQualifier(mapQualifier(e.getQualifier()));
+ ex.setLabel(e.getLabel());
+ ex.setDescription(e.getDescription());
+ ex.setDataInfo(ex.getDataInfo());
+ return ex;
+ }
+
private static Organization convertOrganization(OafProtos.Oaf oaf) {
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
final Organization org = setOaf(new Organization(), oaf);
diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml
index 3e7b1a375..2e3904055 100644
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@@ -4,7 +4,7 @@
eu.dnetlib.dhp
dhp-workflows
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
dhp-aggregation
diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml
index a57c4ba25..586de5bbc 100644
--- a/dhp-workflows/dhp-broker-events/pom.xml
+++ b/dhp-workflows/dhp-broker-events/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java
new file mode 100644
index 000000000..0512a3813
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java
@@ -0,0 +1,104 @@
+
+package eu.dnetlib.dhp.broker.model;
+
+import java.util.Map;
+
+public class Event {
+
+ private String eventId;
+
+ private String producerId;
+
+ private String topic;
+
+ private String payload;
+
+ private Long creationDate;
+
+ private Long expiryDate;
+
+ private boolean instantMessage;
+
+ private Map map;
+
+ public Event() {
+ }
+
+ public Event(final String producerId, final String eventId, final String topic, final String payload,
+ final Long creationDate, final Long expiryDate,
+ final boolean instantMessage,
+ final Map map) {
+ this.producerId = producerId;
+ this.eventId = eventId;
+ this.topic = topic;
+ this.payload = payload;
+ this.creationDate = creationDate;
+ this.expiryDate = expiryDate;
+ this.instantMessage = instantMessage;
+ this.map = map;
+ }
+
+ public String getProducerId() {
+ return this.producerId;
+ }
+
+ public void setProducerId(final String producerId) {
+ this.producerId = producerId;
+ }
+
+ public String getEventId() {
+ return this.eventId;
+ }
+
+ public void setEventId(final String eventId) {
+ this.eventId = eventId;
+ }
+
+ public String getTopic() {
+ return this.topic;
+ }
+
+ public void setTopic(final String topic) {
+ this.topic = topic;
+ }
+
+ public String getPayload() {
+ return this.payload;
+ }
+
+ public void setPayload(final String payload) {
+ this.payload = payload;
+ }
+
+ public Long getCreationDate() {
+ return this.creationDate;
+ }
+
+ public void setCreationDate(final Long creationDate) {
+ this.creationDate = creationDate;
+ }
+
+ public Long getExpiryDate() {
+ return this.expiryDate;
+ }
+
+ public void setExpiryDate(final Long expiryDate) {
+ this.expiryDate = expiryDate;
+ }
+
+ public boolean isInstantMessage() {
+ return this.instantMessage;
+ }
+
+ public void setInstantMessage(final boolean instantMessage) {
+ this.instantMessage = instantMessage;
+ }
+
+ public Map getMap() {
+ return this.map;
+ }
+
+ public void setMap(final Map map) {
+ this.map = map;
+ }
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java
new file mode 100644
index 000000000..0694556b2
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java
@@ -0,0 +1,140 @@
+
+package eu.dnetlib.dhp.broker.model;
+
+import java.text.ParseException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.time.DateUtils;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.schema.oaf.Author;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class EventFactory {
+
+ private final static String PRODUCER_ID = "OpenAIRE";
+
+ private static final int TTH_DAYS = 365;
+
+ private final static String[] DATE_PATTERNS = {
+ "yyyy-MM-dd"
+ };
+
+ public static Event newBrokerEvent(final Result source, final Result target, final UpdateInfo> updateInfo) {
+
+ final long now = new Date().getTime();
+
+ final Event res = new Event();
+
+ final Map map = createMapFromResult(target, source, updateInfo);
+
+ final String payload = createPayload(target, updateInfo);
+
+ final String eventId = calculateEventId(
+ updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
+
+ res.setEventId(eventId);
+ res.setProducerId(PRODUCER_ID);
+ res.setPayload(payload);
+ res.setMap(map);
+ res.setTopic(updateInfo.getTopic());
+ res.setCreationDate(now);
+ res.setExpiryDate(calculateExpiryDate(now));
+ res.setInstantMessage(false);
+ return res;
+ }
+
+ private static String createPayload(final Result result, final UpdateInfo> updateInfo) {
+ final OpenAireEventPayload payload = new OpenAireEventPayload();
+ // TODO
+
+ updateInfo.compileHighlight(payload);
+
+ return payload.toJSON();
+ }
+
+ private static Map createMapFromResult(final Result oaf, final Result source,
+ final UpdateInfo> updateInfo) {
+ final Map map = new HashMap<>();
+
+ final List collectedFrom = oaf.getCollectedfrom();
+ if (collectedFrom.size() == 1) {
+ map.put("target_datasource_id", collectedFrom.get(0).getKey());
+ map.put("target_datasource_name", collectedFrom.get(0).getValue());
+ }
+
+ final List ids = oaf.getOriginalId();
+ if (ids.size() > 0) {
+ map.put("target_publication_id", ids.get(0));
+ }
+
+ final List titles = oaf.getTitle();
+ if (titles.size() > 0) {
+ map.put("target_publication_title", titles.get(0));
+ }
+
+ final long date = parseDateTolong(oaf.getDateofacceptance().getValue());
+ if (date > 0) {
+ map.put("target_dateofacceptance", date);
+ }
+
+ final List subjects = oaf.getSubject();
+ if (subjects.size() > 0) {
+ map
+ .put(
+ "target_publication_subject_list",
+ subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
+ }
+
+ final List authors = oaf.getAuthor();
+ if (authors.size() > 0) {
+ map
+ .put(
+ "target_publication_author_list",
+ authors.stream().map(Author::getFullname).collect(Collectors.toList()));
+ }
+
+ // PROVENANCE INFO
+ map.put("trust", updateInfo.getTrust());
+ final List sourceCollectedFrom = source.getCollectedfrom();
+ if (sourceCollectedFrom.size() == 1) {
+ map.put("provenance_datasource_id", sourceCollectedFrom.get(0).getKey());
+ map.put("provenance_datasource_name", sourceCollectedFrom.get(0).getValue());
+ }
+ map.put("provenance_publication_id_list", source.getOriginalId());
+
+ return map;
+ }
+
+ private static String calculateEventId(final String topic, final String publicationId, final String value) {
+ return "event-"
+ + DigestUtils.md5Hex(topic).substring(0, 6) + "-"
+ + DigestUtils.md5Hex(publicationId).substring(0, 8) + "-"
+ + DigestUtils.md5Hex(value).substring(0, 8);
+ }
+
+ private static long calculateExpiryDate(final long now) {
+ return now + TTH_DAYS * 24 * 60 * 60 * 1000;
+ }
+
+ private static long parseDateTolong(final String date) {
+ if (StringUtils.isBlank(date)) {
+ return -1;
+ }
+ try {
+ return DateUtils.parseDate(date, DATE_PATTERNS).getTime();
+ } catch (final ParseException e) {
+ return -1;
+ }
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java
new file mode 100644
index 000000000..54d4ef36a
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java
@@ -0,0 +1,112 @@
+
+package eu.dnetlib.dhp.broker.oa;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.broker.model.Event;
+import eu.dnetlib.dhp.broker.model.EventFactory;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAbstract;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAuthorOrcid;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingOpenAccess;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPid;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingProject;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPublicationDate;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMissingSubject;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid;
+import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject;
+import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class GenerateEventsApplication {
+
+ private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class);
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void main(final String[] args) throws Exception {
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(
+ GenerateEventsApplication.class
+ .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
+ parser.parseArgument(args);
+
+ final Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String graphPath = parser.get("graphPath");
+ log.info("graphPath: {}", graphPath);
+
+ final String eventsPath = parser.get("eventsPath");
+ log.info("eventsPath: {}", eventsPath);
+
+ final SparkConf conf = new SparkConf();
+ runWithSparkSession(conf, isSparkSessionManaged, spark -> {
+ removeOutputDir(spark, eventsPath);
+ generateEvents(spark, graphPath, eventsPath);
+ });
+
+ }
+
+ private static void removeOutputDir(final SparkSession spark, final String path) {
+ HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+ }
+
+ private static void generateEvents(final SparkSession spark, final String graphPath, final String eventsPath) {
+ // TODO
+ }
+
+ private List generateEvents(final Result... children) {
+ final List list = new ArrayList<>();
+
+ for (final Result source : children) {
+ for (final Result target : children) {
+ if (source != target) {
+ list
+ .addAll(
+ findUpdates(source, target)
+ .stream()
+ .map(info -> EventFactory.newBrokerEvent(source, target, info))
+ .collect(Collectors.toList()));
+ }
+ }
+ }
+
+ return list;
+ }
+
+ private List> findUpdates(final Result source, final Result target) {
+ final List> list = new ArrayList<>();
+ list.addAll(EnrichMissingAbstract.findUpdates(source, target));
+ list.addAll(EnrichMissingAuthorOrcid.findUpdates(source, target));
+ list.addAll(EnrichMissingOpenAccess.findUpdates(source, target));
+ list.addAll(EnrichMissingPid.findUpdates(source, target));
+ list.addAll(EnrichMissingProject.findUpdates(source, target));
+ list.addAll(EnrichMissingPublicationDate.findUpdates(source, target));
+ list.addAll(EnrichMissingSubject.findUpdates(source, target));
+ list.addAll(EnrichMoreOpenAccess.findUpdates(source, target));
+ list.addAll(EnrichMorePid.findUpdates(source, target));
+ list.addAll(EnrichMoreSubject.findUpdates(source, target));
+ return list;
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java
new file mode 100644
index 000000000..493d1f97c
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java
@@ -0,0 +1,31 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingAbstract extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMissingAbstract(final String highlightValue, final float trust) {
+ super("ENRICH/MISSING/ABSTRACT", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getAbstracts().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java
new file mode 100644
index 000000000..6899c62a3
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java
@@ -0,0 +1,31 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingAuthorOrcid extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMissingAuthorOrcid(final String highlightValue, final float trust) {
+ super("ENRICH/MISSING/AUTHOR/ORCID", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ // TODO
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java
new file mode 100644
index 000000000..9464130f3
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java
@@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.Instance;
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingOpenAccess extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMissingOpenAccess(final Instance highlightValue, final float trust) {
+ super("ENRICH/MISSING/OPENACCESS_VERSION", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getInstances().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue().getUrl();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java
new file mode 100644
index 000000000..293d4993f
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java
@@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.broker.objects.Pid;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingPid extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMissingPid(final Pid highlightValue, final float trust) {
+ super("ENRICH/MISSING/PID", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getPids().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java
new file mode 100644
index 000000000..a22c179a2
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java
@@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.broker.objects.Project;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingProject extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMissingProject(final Project highlightValue, final float trust) {
+ super("ENRICH/MISSING/PROJECT", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getProjects().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram()
+ + getHighlightValue().getCode();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java
new file mode 100644
index 000000000..869dca264
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java
@@ -0,0 +1,31 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingPublicationDate extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMissingPublicationDate(final String highlightValue, final float trust) {
+ super("ENRICH/MISSING/PUBLICATION_DATE", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().setPublicationdate(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java
new file mode 100644
index 000000000..a2ed5d043
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java
@@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMissingSubject extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // MESHEUROPMC
+ // ARXIV
+ // JEL
+ // DDC
+ // ACM
+
+ return Arrays.asList();
+ }
+
+ private EnrichMissingSubject(final String subjectClassification, final String highlightValue, final float trust) {
+ super("ENRICH/MISSING/SUBJECT/" + subjectClassification, highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getSubjects().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java
new file mode 100644
index 000000000..4f1e88d3d
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java
@@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.Instance;
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMoreOpenAccess extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMoreOpenAccess(final Instance highlightValue, final float trust) {
+ super("ENRICH/MORE/OPENACCESS_VERSION", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getInstances().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue().getUrl();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java
new file mode 100644
index 000000000..ecf2cf310
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java
@@ -0,0 +1,32 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.broker.objects.Pid;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMorePid extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
+ return Arrays.asList();
+ }
+
+ private EnrichMorePid(final Pid highlightValue, final float trust) {
+ super("ENRICH/MORE/PID", highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getPids().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java
new file mode 100644
index 000000000..f29b86292
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java
@@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+public class EnrichMoreSubject extends UpdateInfo {
+
+ public static List findUpdates(final Result source, final Result target) {
+ // MESHEUROPMC
+ // ARXIV
+ // JEL
+ // DDC
+ // ACM
+
+ return Arrays.asList();
+ }
+
+ private EnrichMoreSubject(final String subjectClassification, final String highlightValue, final float trust) {
+ super("ENRICH/MORE/SUBJECT/" + subjectClassification, highlightValue, trust);
+ }
+
+ @Override
+ public void compileHighlight(final OpenAireEventPayload payload) {
+ payload.getHighlight().getSubjects().add(getHighlightValue());
+ }
+
+ @Override
+ public String getHighlightValueAsString() {
+ return getHighlightValue();
+ }
+
+}
diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java
new file mode 100644
index 000000000..f7b6b69e9
--- /dev/null
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java
@@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.broker.oa.util;
+
+import eu.dnetlib.broker.objects.OpenAireEventPayload;
+
+public abstract class UpdateInfo {
+
+ private final String topic;
+
+ private final T highlightValue;
+
+ private final float trust;
+
+ protected UpdateInfo(final String topic, final T highlightValue, final float trust) {
+ this.topic = topic;
+ this.highlightValue = highlightValue;
+ this.trust = trust;
+ }
+
+ public T getHighlightValue() {
+ return highlightValue;
+ }
+
+ public float getTrust() {
+ return trust;
+ }
+
+ public String getTopic() {
+ return topic;
+ }
+
+ abstract public void compileHighlight(OpenAireEventPayload payload);
+
+ abstract public String getHighlightValueAsString();
+
+}
diff --git a/dhp-workflows/dhp-bulktag/pom.xml b/dhp-workflows/dhp-bulktag/pom.xml
index 4844ef287..cc2396abf 100644
--- a/dhp-workflows/dhp-bulktag/pom.xml
+++ b/dhp-workflows/dhp-bulktag/pom.xml
@@ -5,7 +5,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.1.8-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
index 90a99926e..e62b4b4fc 100644
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@@ -25,6 +25,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class SparkBulkTagJob {
private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class);
+ public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@@ -108,12 +109,12 @@ public class SparkBulkTagJob {
.json(outputPath);
}
- private static Dataset readPath(
- SparkSession spark, String inputEntityPath, Class clazz) {
+ public static Dataset readPath(
+ SparkSession spark, String inputPath, Class clazz) {
return spark
.read()
- .json(inputEntityPath)
- .as(Encoders.bean(clazz));
+ .textFile(inputPath)
+ .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
}
}
diff --git a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
index 2fea9ff41..524281bc9 100644
--- a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@@ -1,39 +1,33 @@
-
-
- sourcePath
- the source path
-
-
- isLookUpUrl
- the isLookup service endpoint
-
-
- pathMap
- the json path associated to each selection field
-
-
- outputPath
- the output path
-
-
+
+
+ sourcePath
+ the source path
+
+
+ isLookUpUrl
+ the isLookup service endpoint
+
+
+ pathMap
+ the json path associated to each selection field
+
+
+ outputPath
+ the output path
+
+
-
+
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
-
+
-
-
-
-
-
-
-
-
+
+
@@ -67,6 +61,7 @@
+
${jobTracker}
@@ -91,14 +86,13 @@
+
+
+
+
+
+
-
-
-
-
-
-
-
${jobTracker}
@@ -127,6 +121,7 @@
+
${jobTracker}
@@ -155,6 +150,7 @@
+
${jobTracker}
@@ -183,6 +179,7 @@
+
${jobTracker}
@@ -211,6 +208,9 @@
-
-
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml
index e7f2a926f..2bf7b64d6 100644
--- a/dhp-workflows/dhp-dedup-openaire/pom.xml
+++ b/dhp-workflows/dhp-dedup-openaire/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
dhp-dedup-openaire
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
index a44650823..c0503d991 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java
@@ -137,10 +137,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
}
private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) {
+
+ String entityType = dedupConf.getWf().getEntityType();
+
Relation r = new Relation();
r.setSource(source);
r.setTarget(target);
r.setRelClass(relClass);
+ r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1));
r.setSubRelType("dedup");
DataInfo info = new DataInfo();
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
index 2d18c9a61..516808511 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java
@@ -86,7 +86,8 @@ public class SparkPropagateRelation extends AbstractSparkAction {
mergedIds,
FieldType.TARGET,
getFixRelFn(FieldType.TARGET))
- .filter(SparkPropagateRelation::containsDedup);
+ .filter(SparkPropagateRelation::containsDedup)
+ .distinct();
Dataset updated = processDataset(
processDataset(rels, mergedIds, FieldType.SOURCE, getDeletedFn()),
diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
index 990ac04c0..8dd00be97 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java
@@ -18,6 +18,7 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@@ -29,6 +30,8 @@ import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
@@ -420,7 +423,7 @@ public class SparkDedupTest implements Serializable {
long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count();
- assertEquals(5022, relations);
+ assertEquals(4975, relations);
// check deletedbyinference
final Dataset mergeRels = spark
@@ -450,6 +453,25 @@ public class SparkDedupTest implements Serializable {
assertEquals(updated, deletedbyinference);
}
+ @Test
+ @Order(6)
+ public void testRelations() throws Exception {
+ testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10);
+ testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2);
+ }
+
+ private void testUniqueness(String path, int expected_total, int expected_unique) {
+ Dataset rel = spark
+ .read()
+ .textFile(getClass().getResource(path).getPath())
+ .map(
+ (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class),
+ Encoders.bean(Relation.class));
+
+ assertEquals(expected_total, rel.count());
+ assertEquals(expected_unique, rel.distinct().count());
+ }
+
@AfterAll
public static void finalCleanUp() throws IOException {
FileUtils.deleteDirectory(new File(testOutputBasePath));
diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json
new file mode 100644
index 000000000..c0cf8b695
--- /dev/null
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_1.json
@@ -0,0 +1,12 @@
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::40c7b1dfa18c3693d374dafd21ef852f","subRelType":"provision","target":"10|doajarticles::618df40624078491acfd93ca3ff6921c"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::0b4e756a73338f60b84de98d080f6422","subRelType":"provision","target":"10|doajarticles::6d01e689db13b6977b411f4170b6143b"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::fe2f7c9d350b9c5aa658ec384d761e33","subRelType":"provision","target":"10|doajarticles::9b8a956b0703854ba79e52ddf7dc552e"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|doajarticles::a116734108ba011ef715b012f095e3f5","subRelType":"provision","target":"10|doajarticles::c5de04b1a35da2cc4468e299bc9ffa16"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|opendoar____::8b83abbbcad5496fe43cda88d0045aa4","subRelType":"provision","target":"10|opendoar____::6855456e2fe46a9d49d3d3af4f57443d"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|opendoar____::88034de0247d9d36e22783e9319c5ba3","subRelType":"provision","target":"10|opendoar____::c17028c9b6e0c5deaad29665d582284a"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|opendoar____::dfb21c796f33e9acf505cc960a3d8d2c","subRelType":"provision","target":"10|opendoar____::dfa037a53e121ecc9e0926800c3e814e"}
+{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::b526b1aa1562038881a31be59896985f","subRelType":"provision","target":"10|re3data_____::2e457773b62df3534cc04441bf406a70"}
+{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::6b306183bc051b5aaa5376f2fab6e6e5","subRelType":"provision","target":"10|re3data_____::6371ff9ee1ec7073416cb83c868b10a3"}
+{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::0f697c2543a43bc0da793bf78ecd4996","subRelType":"provision","target":"10|re3data_____::770ef1f8eb03f174c0add746523c6f28"}
+{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::0f697c2543a43bc0da793bf78ecd4996","subRelType":"provision","target":"10|re3data_____::770ef1f8eb03f174c0add746523c6f28"}
+{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1588608946167,"relClass":"provides","relType":"datasourceOrganization","source":"20|re3data_____::0f697c2543a43bc0da793bf78ecd4996","subRelType":"provision","target":"10|re3data_____::770ef1f8eb03f174c0add746523c6f28"}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json
new file mode 100644
index 000000000..00db9715b
--- /dev/null
+++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/test/relation_2.json
@@ -0,0 +1,10 @@
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681628"}
+{"collectedfrom":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},"lastupdatetimestamp":null,"relClass":"isMergedIn","relType":"resultResult","source":"50|dedup_wf_001::498c4e6cfff198831b488a6c62221241","subRelType":"dedup","target":"50|doiboost____::8e5e14d80d0f2ebe6a6a55d972681629"}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml
index e87811cd5..395f39e8c 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml
+++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml
index d013dd1d9..572667949 100644
--- a/dhp-workflows/dhp-distcp/pom.xml
+++ b/dhp-workflows/dhp-distcp/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml
index d25446bbc..f7984528e 100644
--- a/dhp-workflows/dhp-graph-mapper/pom.xml
+++ b/dhp-workflows/dhp-graph-mapper/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
index 08a89cb22..891fee57e 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
@@ -5,10 +5,8 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
+import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
@@ -115,12 +113,17 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
.setProcessingchargecurrency(
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
- for (final Object o : doc.selectNodes("//dc:identifier")) {
- final String url = ((Node) o).getText().trim();
- if (url.startsWith("http")) {
- instance.setUrl(Arrays.asList(url));
- }
- }
+ List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
+ instance
+ .setUrl(
+ nodes
+ .stream()
+ .filter(n -> StringUtils.isNotBlank(n.getText()))
+ .map(n -> n.getText().trim())
+ .filter(u -> u.startsWith("http"))
+ .distinct()
+ .collect(Collectors.toCollection(ArrayList::new)));
+
return Lists.newArrayList(instance);
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
index 92a37c067..04984d008 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@@ -6,10 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
@@ -80,6 +77,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final KeyValue hostedby) {
final Instance instance = new Instance();
+ final Set url = new HashSet<>();
instance.setUrl(new ArrayList<>());
instance
.setInstancetype(
@@ -100,17 +98,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
- instance.getUrl().add(((Node) o).getText().trim());
+ url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
- instance.getUrl().add(((Node) o).getText().trim());
+ url.add(((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
- instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
+ url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
- instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
+ url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
+ instance.getUrl().addAll(url);
return Arrays.asList(instance);
}
diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml
index 03604f431..c202c66f7 100644
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml
index baac163d2..9b3aa2fc4 100644
--- a/dhp-workflows/dhp-graph-provision/pom.xml
+++ b/dhp-workflows/dhp-graph-provision/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
index 923f6de69..298ac7589 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
@@ -405,6 +405,9 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.speculation=false
+ --conf spark.hadoop.mapreduce.map.speculative=false
+ --conf spark.hadoop.mapreduce.reduce.speculative=false
--inputPath${workingDir}/xml
--isLookupUrl ${isLookupUrl}
diff --git a/dhp-workflows/dhp-propagation/pom.xml b/dhp-workflows/dhp-propagation/pom.xml
index 7d822b9d2..67fdbd9cf 100644
--- a/dhp-workflows/dhp-propagation/pom.xml
+++ b/dhp-workflows/dhp-propagation/pom.xml
@@ -5,7 +5,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.1.8-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
index 90b004883..f269c5442 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
@@ -27,14 +27,8 @@
-
-
-
-
-
-
-
-
+
+
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
index 243167bd6..7b06b6504 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml
@@ -1,37 +1,29 @@
-
-
- sourcePath
- the source path
-
-
- allowedsemrels
- the semantic relationships allowed for propagation
-
-
- outputPath
- the output path
-
-
-
+
+
+ sourcePath
+ the source path
+
+
+ allowedsemrels
+ the semantic relationships allowed for propagation
+
+
+ outputPath
+ the output path
+
+
+
-
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
+
-
-
-
-
-
-
-
-
-
+
+
@@ -119,11 +111,16 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
- --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
+ --sourcePath
+ ${sourcePath}
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath
+ ${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels
+ ${allowedsemrels}
@@ -147,11 +144,16 @@
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
- --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
+ --sourcePath
+ ${sourcePath}
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath
+ ${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels
+ ${allowedsemrels}
@@ -175,11 +177,16 @@
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
- --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
+ --sourcePath
+ ${sourcePath}
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath
+ ${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels
+ ${allowedsemrels}
@@ -203,11 +210,16 @@
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --sourcePath${sourcePath}
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
- --outputPath${workingDir}/preparedInfo/targetOrcidAssoc
- --allowedsemrels${allowedsemrels}
+ --sourcePath
+ ${sourcePath}
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.Software
+ --outputPath
+ ${workingDir}/preparedInfo/targetOrcidAssoc
+ --allowedsemrels
+ ${allowedsemrels}
@@ -233,11 +245,13 @@
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- --sourcePath${workingDir}/preparedInfo/targetOrcidAssoc
- --outputPath${workingDir}/preparedInfo/mergedOrcidAssoc
+ --sourcePath
+ ${workingDir}/preparedInfo/targetOrcidAssoc
+ --outputPath
+ ${workingDir}/preparedInfo/mergedOrcidAssoc
-
+
@@ -270,12 +284,18 @@
--conf spark.hadoop.mapreduce.reduce.speculative=false
--conf spark.sql.shuffle.partitions=3840
- --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc
- --sourcePath${sourcePath}/publication
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
- --outputPath${outputPath}/publication
- --saveGraph${saveGraph}
+ --possibleUpdatesPath
+ ${workingDir}/preparedInfo/mergedOrcidAssoc
+ --sourcePath
+ ${sourcePath}/publication
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath
+ ${outputPath}/publication
+ --saveGraph
+ ${saveGraph}
@@ -301,12 +321,18 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
- --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc
- --sourcePath${sourcePath}/dataset
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
- --outputPath${outputPath}/dataset
- --saveGraph${saveGraph}
+ --possibleUpdatesPath
+ ${workingDir}/preparedInfo/mergedOrcidAssoc
+ --sourcePath
+ ${sourcePath}/dataset
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath
+ ${outputPath}/dataset
+ --saveGraph
+ ${saveGraph}
@@ -332,12 +358,18 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
- --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc
- --sourcePath${sourcePath}/otherresearchproduct
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
- --outputPath${outputPath}/otherresearchproduct
- --saveGraph${saveGraph}
+ --possibleUpdatesPath
+ ${workingDir}/preparedInfo/mergedOrcidAssoc
+ --sourcePath
+ ${sourcePath}/otherresearchproduct
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath
+ ${outputPath}/otherresearchproduct
+ --saveGraph
+ ${saveGraph}
@@ -363,16 +395,22 @@
--conf spark.hadoop.mapreduce.map.speculative=false
--conf spark.hadoop.mapreduce.reduce.speculative=false
- --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc
- --sourcePath${sourcePath}/software
- --hive_metastore_uris${hive_metastore_uris}
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
- --outputPath${outputPath}/software
- --saveGraph${saveGraph}
+ --possibleUpdatesPath
+ ${workingDir}/preparedInfo/mergedOrcidAssoc
+ --sourcePath
+ ${sourcePath}/software
+ --hive_metastore_uris
+ ${hive_metastore_uris}
+ --resultTableName
+ eu.dnetlib.dhp.schema.oaf.Software
+ --outputPath
+ ${outputPath}/software
+ --saveGraph
+ ${saveGraph}
-
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
index 850a2f498..dd7f25846 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml
@@ -14,27 +14,21 @@
-
-
+
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
+
-
-
-
-
-
-
-
-
-
+
+
+
@@ -190,4 +184,5 @@
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
index e041fc39c..3be69bde6 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml
@@ -14,23 +14,16 @@
-
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
+
-
-
-
-
-
-
-
-
-
+
+
@@ -64,6 +57,7 @@
+
${jobTracker}
@@ -85,6 +79,7 @@
+
@@ -111,7 +106,6 @@
--organizationtoresultcommunitymap${organizationtoresultcommunitymap}
-
@@ -150,6 +144,7 @@
+
yarn
@@ -178,6 +173,7 @@
+
yarn
@@ -206,6 +202,7 @@
+
yarn
@@ -238,4 +235,5 @@
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
index f2d406ad9..b75b2d31e 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
@@ -1,43 +1,38 @@
-
-
- sourcePath
- the source path
-
-
- allowedsemrels
- the semantic relationships allowed for propagation
-
-
- isLookUpUrl
- the isLookup service endpoint
-
-
- outputPath
- the output path
-
-
+
+
+ sourcePath
+ the source path
+
+
+ allowedsemrels
+ the semantic relationships allowed for propagation
+
+
+ isLookUpUrl
+ the isLookup service endpoint
+
+
+ outputPath
+ the output path
+
+
-
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
+
+
-
-
-
-
-
-
-
-
-
+
+
+
@@ -126,6 +121,7 @@
+
yarn
@@ -182,6 +178,7 @@
+
yarn
@@ -214,29 +211,29 @@
-
- yarn
- cluster
- ResultToCommunityEmRelPropagation-PreparePhase2
- eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2
- dhp-propagation-${projectVersion}.jar
-
- --executor-cores=${sparkExecutorCores}
- --executor-memory=${sparkExecutorMemory}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --conf spark.dynamicAllocation.enabled=true
- --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
-
- --sourcePath${workingDir}/preparedInfo/targetCommunityAssoc
- --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc
-
-
-
-
+
+ yarn
+ cluster
+ ResultToCommunityEmRelPropagation-PreparePhase2
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${workingDir}/preparedInfo/targetCommunityAssoc
+ --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc
+
+
+
+
@@ -275,6 +272,7 @@
+
yarn
@@ -303,6 +301,7 @@
+
yarn
@@ -331,6 +330,7 @@
+
yarn
@@ -362,5 +362,6 @@
-
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
index 6bf7d0cec..73268fcc7 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml
@@ -1,32 +1,25 @@
-
-
- sourcePath
- the source path
-
-
- outputPath
- sets the outputPath
-
-
+
+
+ sourcePath
+ the source path
+
+
+ outputPath
+ sets the outputPath
+
+
-
-
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
+
-
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
-
-
-
-
-
-
-
-
-
+
+
@@ -53,6 +46,7 @@
+
${jobTracker}
@@ -85,6 +79,7 @@
+
${jobTracker}
@@ -95,6 +90,7 @@
+
${jobTracker}
@@ -151,9 +147,9 @@
--alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked
-
+
@@ -220,6 +216,7 @@
+
yarn
@@ -281,6 +278,7 @@
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml
index 0f5e18082..6a3fa8f1d 100644
--- a/dhp-workflows/dhp-stats-update/pom.xml
+++ b/dhp-workflows/dhp-stats-update/pom.xml
@@ -3,7 +3,7 @@
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
dhp-stats-update
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql
index d4ca2e10e..461f48bfc 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9_6.sql
@@ -1,2 +1,2 @@
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages;
-CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) as id, langs.languages as language from openaire.datasource d LATERAL VIEW explode(d.odlanguages.value) langs as languages;
+CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) as id, langs.languages as language from ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs as languages;
diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml
index bad72a9ef..c51b6a502 100644
--- a/dhp-workflows/dhp-worfklow-profiles/pom.xml
+++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml
@@ -1,11 +1,9 @@
-
+
dhp-workflows
eu.dnetlib.dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
4.0.0
diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml
index e59ad2e79..f2ece234d 100644
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
../
diff --git a/pom.xml b/pom.xml
index 483873219..3c5263bb1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0
eu.dnetlib.dhp
dhp
- 1.1.7-SNAPSHOT
+ 1.2.0-SNAPSHOT
pom