diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index f37abfa2a..a66da3e6d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -35,7 +35,7 @@ object SparkCreateSummaryObject { val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result] - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index ba1ebdb64..c0e28ee36 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.sx.graph.scholix import eu.dnetlib.dhp.schema.oaf.{Dataset, Result} -import eu.dnetlib.dhp.schema.sx.summary.{SchemeValue, ScholixSummary, TypedIdentifier, Typology} +import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, TypedIdentifier, Typology} import scala.collection.JavaConverters._ +import scala.language.postfixOps object ScholixUtils { @@ -11,7 +12,13 @@ object ScholixUtils { def resultToSummary(r:Result):ScholixSummary = { val s = new ScholixSummary s.setId(r.getId) - s.setLocalIdentifier(r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid)).asJava) + if (r.getPid == null || r.getPid.isEmpty) + return null + + val pids:List[TypedIdentifier] = r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid))(collection breakOut) + s.setLocalIdentifier(pids.asJava) + + s.getLocalIdentifier.isEmpty if (r.isInstanceOf[Dataset]) s.setTypology(Typology.dataset) @@ -21,11 +28,17 @@ object ScholixUtils { s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { - s.setTitle(r.getTitle.asScala.map(t => t.getValue).asJava) + val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + if (titles.nonEmpty) + s.setTitle(titles.asJava) + else + return null } if(r.getAuthor!= null && !r.getAuthor.isEmpty) { - s.setAuthor(r.getAuthor.asScala.map(a=> a.getFullname).asJava) + val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) + if (authors nonEmpty) + s.setAuthor(authors.asJava) } if (r.getInstance() != null) { val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) @@ -38,12 +51,21 @@ object ScholixUtils { s.setDescription(d.get.getValue) } - if (r.getSubject!= null && !r.getSubject.isEmpty) - s.setSubject(r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)).asJava) + if (r.getSubject!= null && !r.getSubject.isEmpty) { + val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + if (subjects.nonEmpty) + s.setSubject(subjects.asJava) + } if (r.getPublisher!= null) s.setPublisher(List(r.getPublisher.getValue).asJava) + if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { + val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + if (cf.nonEmpty) + s.setDatasources(cf.asJava) + } + s.setRelatedDatasets(0) s.setRelatedPublications(0) s.setRelatedUnknown(0)