From 0377b40fbad56c0dd75fb7c8287488a4f63ceffe Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 30 Jul 2020 18:38:07 +0200 Subject: [PATCH] output to one parquet file --- .../doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 2 +- .../doiboost/orcidnodoi/oaf/PublicationToOaf.java | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index cae5a168f7..dea597cbb9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -123,7 +123,7 @@ public class SparkGenEnrichedOrcidWorks { Dataset publicationDataset = spark .createDataset( - oafPublicationRDD.rdd(), + oafPublicationRDD.repartition(1).rdd(), Encoders.bean(Publication.class)); publicationDataset .write() diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 448fa9a744..503df67ff0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory; import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -217,6 +218,8 @@ public class PublicationToOaf implements Serializable { final List urls = createRepeatedField(rootElement, "urls"); if (urls != null && !urls.isEmpty()) { instance.setUrl(urls); + } else { + dataInfo.setInvisible(true); } final String pubDate = getPublicationDate(rootElement, "publicationDates"); @@ -508,8 +511,10 @@ public class PublicationToOaf implements Serializable { final StructuredProperty sp = new StructuredProperty(); sp.setValue(orcidId); final Qualifier q = new Qualifier(); - q.setClassid("ORCID"); - q.setClassname("ORCID"); + q.setClassid(ORCID.toLowerCase()); + q.setClassname(ORCID.toLowerCase()); + q.setSchemeid(ModelConstants.DNET_PID_TYPES); + q.setSchemename(ModelConstants.DNET_PID_TYPES); sp.setQualifier(q); return sp; }