diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index e4d458780..6b5bed5b8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; -import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; @@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType; -import eu.dnetlib.dhp.actionmanager.ror.model.Relationship; import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; @@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -168,38 +165,10 @@ public class GenerateRorActionSetJob { final List> res = new ArrayList<>(); res.add(new AtomicAction<>(Organization.class, o)); - for (final Relationship rorRel : r.getRelationships()) { - if (rorRel.getType().equalsIgnoreCase("parent")) { - final String orgId1 = calculateOpenaireId(r.getId()); - final String orgId2 = calculateOpenaireId(rorRel.getId()); - res - .add( - new AtomicAction<>(Relation.class, - calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF))); - res - .add( - new AtomicAction<>(Relation.class, - calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF))); - } - } - return res; } - private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) { - final Relation rel = new Relation(); - rel.setSource(source); - rel.setTarget(target); - rel.setRelType(ORG_ORG_RELTYPE); - rel.setSubRelType(ModelConstants.RELATIONSHIP); - rel.setRelClass(relClass); - rel.setCollectedfrom(ROR_COLLECTED_FROM); - rel.setDataInfo(ROR_DATA_INFO); - rel.setLastupdatetimestamp(System.currentTimeMillis()); - return rel; - } - private static String calculateOpenaireId(final String rorId) { return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId)); } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json index 947a9a255..ecae6811a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json @@ -15,7 +15,7 @@ "official_name": "Aperta TÜBİTAK Open Archive" }, "BL.CAM": { - "openaire_id": "re3data_____::r3d100010620", + "openaire_id": "opendoar____::109", "datacite_name": "Apollo", "official_name": "Apollo" }, @@ -196,7 +196,7 @@ }, "CSIC.DIGITAL": { "openaire_id": "re3data_____::r3d100011076", - "datacite_name": "DIGITAL.CSIC", + "datacite_name": "Digital CSIC", "official_name": "DIGITAL.CSIC" }, "BL.DRI": { @@ -644,6 +644,11 @@ "datacite_name": "PANGAEA", "official_name": "PANGAEA" }, + "TIB.PANGAEA": { + "openaire_id": "re3data_____::r3d100010134", + "datacite_name": "PANGAEA", + "official_name": "PANGAEA" + }, "NASAPDS.NASAPDS": { "openaire_id": "re3data_____::r3d100010121", "datacite_name": "PDS", @@ -896,7 +901,7 @@ }, "FIGSHARE.UCT": { "openaire_id": "re3data_____::r3d100012633", - "datacite_name": "ZivaHub", + "datacite_name": "University of Cape Town (UCT)", "official_name": "ZivaHub" }, "BL.UCLAN": { @@ -1030,9 +1035,9 @@ "official_name": "ZBW Journal Data Archive" }, "CERN.ZENODO": { - "openaire_id": "re3data_____::r3d100010468", + "openaire_id": "opendoar____::2659", "datacite_name": "Zenodo", - "official_name": "Zenodo" + "official_name": "ZENODO" }, "ZBW.ZEW": { "openaire_id": "re3data_____::r3d100010399", diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 79b9e8183..72b93749b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -60,7 +60,7 @@ object SparkGenerateDoiBoost { val openaireOrganizationPath = parser.get("openaireOrganizationPath") val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { - override def zero: Publication = new Publication + override def zero: Publication = null override def reduce(b: Publication, a: (String, Publication)): Publication = { @@ -177,8 +177,33 @@ object SparkGenerateDoiBoost { .map(DoiBoostMappingUtil.fixPublication) .map(p => (p.getId, p)) .groupByKey(_._1) - .agg(crossrefAggregator.toColumn) - .map(p => p._2) + .reduceGroups((left, right) => { + //Check left is not null + if (left != null && left._1 != null) { + //If right is null then return left + if (right == null || right._2 == null) + left + else { + // Here Left and Right are not null + // So we have to merge + val b1 = left._2 + val b2 = right._2 + b1.mergeFrom(b2) + b1.mergeOAFDataInfo(b2) + val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) + b1.setAuthor(authors) + if (b2.getId != null && b2.getId.nonEmpty) + b1.setId(b2.getId) + //Return publication Merged + (b1.getId, b1) + } + } else { + // Left is Null so we return right + right + } + }) + .filter(s => s != null && s._2 != null) + .map(s => s._2._2) .write .mode(SaveMode.Overwrite) .save(s"$workingDirPath/doiBoostPublicationFiltered") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index b5801ca5c..3e23e3493 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -711,10 +711,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Relation r = new Relation(); r.setRelType(ORG_ORG_RELTYPE); r.setSubRelType(ModelConstants.RELATIONSHIP); - r - .setRelClass( - rs.getString("type").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF - : ModelConstants.IS_CHILD_OF); + r.setRelClass(rs.getString("type")); r.setSource(orgId1); r.setTarget(orgId2); r.setCollectedfrom(collectedFrom); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml index f77b46105..888a873c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -283,7 +283,15 @@ - + + + + + + + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql index 388fee3f5..0ac843401 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql @@ -10,4 +10,4 @@ SELECT 'OpenOrgs Database' AS collectedfromname, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction FROM relationships -WHERE reltype = 'Child' OR reltype = 'Parent' \ No newline at end of file +WHERE reltype = 'IsChildOf' OR reltype = 'IsParentOf' \ No newline at end of file