From 098914dcff4fa9a01d229ff610a656853c54602a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Mar 2021 11:35:02 +0100 Subject: [PATCH] fix wrong relation with source null --- .../SparkGenerateDOIBoostActionSet.scala | 16 +++++++++------- .../dnetlib/doiboost/crossref/Crossref2Oaf.scala | 13 +++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala index 78477ae4d..21d3454da 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala @@ -38,37 +38,39 @@ object SparkGenerateDOIBoostActionSet { val crossRefRelation = parser.get("crossRefRelation") val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath") val dbOrganizationPath = parser.get("dbOrganizationPath") - val workingDirPath = parser.get("targetPath") val sequenceFilePath = parser.get("sFilePath") val asDataset = spark.read.load(dbDatasetPath).as[OafDataset] + .filter(p => p != null || p.getId != null) .map(d =>DoiBoostMappingUtil.fixResult(d)) .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) -// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/actionSet") + val asPublication =spark.read.load(dbPublicationPath).as[Publication] + .filter(p => p != null || p.getId != null) .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) -// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet") + val asOrganization = spark.read.load(dbOrganizationPath).as[Organization] .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) -// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet") + val asCRelation = spark.read.load(crossRefRelation).as[Relation] + .filter(r => r!= null || (r.getSource != null && r.getTarget != null)) .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) -// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet") + val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation] .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) -// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet") + val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation) -// spark.read.load(s"$workingDirPath/actionSet").as[(String,String)] + d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 43b3f7e1c..b051177f5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -15,7 +15,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex -import eu.dnetlib.dhp.schema.scholexplorer.OafUtils; +import eu.dnetlib.dhp.schema.scholexplorer.OafUtils case class CrossrefDT(doi: String, json:String, timestamp: Long) {} @@ -182,7 +182,7 @@ case object Crossref2Oaf { // Ticket #6281 added pid to Instance instance.setPid(result.getPid) - val has_review = (json \ "relation" \"has-review" \ "id") + val has_review = json \ "relation" \"has-review" \ "id" if(has_review != JNothing) { instance.setRefereed( @@ -208,8 +208,9 @@ case object Crossref2Oaf { instance.setUrl(links.asJava) result.setId(IdentifierFactory.createDOIBoostIdentifier(result)) if (result.getId== null) - return null - result + null + else + result } @@ -241,9 +242,9 @@ case object Crossref2Oaf { val result = generateItemFromType(objectType, objectSubType) if (result == null) return List() - val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")); + val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")) mappingResult(result, json, cOBJCategory) - if (result == null) + if (result == null || result.getId == null) return List()