From bd3b16402b4a21dc2862e05aad83752533f39494 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 1 Mar 2021 10:16:02 +0100 Subject: [PATCH] added result typologies --- .../orcidnodoi/SparkGenEnrichedOrcidWorks.java | 8 ++++++-- .../doiboost/orcidnodoi/oaf/PublicationToOaf.java | 13 +++++++++++++ .../doiboost/orcidnodoi/mappings/typologies.json | 10 +++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 933162f280..cda08939ca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -8,7 +8,6 @@ import java.util.List; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; @@ -38,6 +37,7 @@ import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.json.JsonHelper; +import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; @@ -137,6 +137,8 @@ public class SparkGenEnrichedOrcidWorks { .sparkContext() .longAccumulator("errorsNotFoundAuthors"); final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType"); + final LongAccumulator otherTypeFound = spark.sparkContext().longAccumulator("otherTypeFound"); + final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, @@ -144,7 +146,8 @@ public class SparkGenEnrichedOrcidWorks { errorsInvalidTitle, errorsNotFoundAuthors, errorsInvalidType, - dateOfCollection); + otherTypeFound, + dateOfCollection); JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { @@ -173,6 +176,7 @@ public class SparkGenEnrichedOrcidWorks { logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); + logger.info("otherTypeFound: " + otherTypeFound.value().toString()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index f786015063..777f3fa462 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -43,6 +43,7 @@ public class PublicationToOaf implements Serializable { private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; + private final LongAccumulator otherTypeFound; public PublicationToOaf( LongAccumulator parsedPublications, @@ -51,6 +52,7 @@ public class PublicationToOaf implements Serializable { LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, LongAccumulator errorsInvalidType, + LongAccumulator otherTypeFound, String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; @@ -58,6 +60,7 @@ public class PublicationToOaf implements Serializable { this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; + this.otherTypeFound = otherTypeFound; this.dateOfCollection = dateOfCollection; } @@ -68,6 +71,8 @@ public class PublicationToOaf implements Serializable { this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; + this.otherTypeFound = null; + this.dateOfCollection = null; } private static Map> datasources = new HashMap>() { @@ -221,6 +226,14 @@ public class PublicationToOaf implements Serializable { final String typeValue = typologiesMapping.get(type).get("value"); cobjValue = typologiesMapping.get(type).get("cobj"); + // this dataset must contain only publication + if (cobjValue.equals("0020")) { + if (otherTypeFound != null) { + otherTypeFound.add(1); + } + return null; + } + final Instance instance = new Instance(); // Adding hostedby diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json index 001266479b..84b4f84181 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json @@ -31,5 +31,13 @@ "annotation": {"cobj":"0018", "value": "Annotation"}, "physical-object": {"cobj":"0028", "value": "PhysicalObject"}, "preprint": {"cobj":"0016", "value": "Preprint"}, - "software": {"cobj":"0029", "value": "Software"} + "software": {"cobj":"0029", "value": "Software"}, + "journal-issue": {"cobj":"0001", "value": "Article"}, + "translation": {"cobj":"0038", "value": "Other literature type"}, + "artistic-performance": {"cobj":"0020", "value": "Other ORP type"}, + "online-resource": {"cobj":"0020", "value": "Other ORP type"}, + "registered-copyright": {"cobj":"0020", "value": "Other ORP type"}, + "trademark": {"cobj":"0020", "value": "Other ORP type"}, + "invention": {"cobj":"0020", "value": "Other ORP type"}, + "spin-off-company": {"cobj":"0020", "value": "Other ORP type"} } \ No newline at end of file