fixed mapping OAF to Scholix summary

This commit is contained in:
Sandro La Bruzzo 2021-07-02 16:48:48 +02:00
parent 8fa0841898
commit e4b84ef5d6
2 changed files with 29 additions and 7 deletions

View File

@ -35,7 +35,7 @@ object SparkCreateSummaryObject {
val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result] val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result]
ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).write.mode(SaveMode.Overwrite).save(targetPath) ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath)
} }

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.sx.graph.scholix package eu.dnetlib.dhp.sx.graph.scholix
import eu.dnetlib.dhp.schema.oaf.{Dataset, Result} import eu.dnetlib.dhp.schema.oaf.{Dataset, Result}
import eu.dnetlib.dhp.schema.sx.summary.{SchemeValue, ScholixSummary, TypedIdentifier, Typology} import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, TypedIdentifier, Typology}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.language.postfixOps
object ScholixUtils { object ScholixUtils {
@ -11,7 +12,13 @@ object ScholixUtils {
def resultToSummary(r:Result):ScholixSummary = { def resultToSummary(r:Result):ScholixSummary = {
val s = new ScholixSummary val s = new ScholixSummary
s.setId(r.getId) s.setId(r.getId)
s.setLocalIdentifier(r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid)).asJava) if (r.getPid == null || r.getPid.isEmpty)
return null
val pids:List[TypedIdentifier] = r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid))(collection breakOut)
s.setLocalIdentifier(pids.asJava)
s.getLocalIdentifier.isEmpty
if (r.isInstanceOf[Dataset]) if (r.isInstanceOf[Dataset])
s.setTypology(Typology.dataset) s.setTypology(Typology.dataset)
@ -21,11 +28,17 @@ object ScholixUtils {
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) {
s.setTitle(r.getTitle.asScala.map(t => t.getValue).asJava) val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut)
if (titles.nonEmpty)
s.setTitle(titles.asJava)
else
return null
} }
if(r.getAuthor!= null && !r.getAuthor.isEmpty) { if(r.getAuthor!= null && !r.getAuthor.isEmpty) {
s.setAuthor(r.getAuthor.asScala.map(a=> a.getFullname).asJava) val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut)
if (authors nonEmpty)
s.setAuthor(authors.asJava)
} }
if (r.getInstance() != null) { if (r.getInstance() != null) {
val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut)
@ -38,12 +51,21 @@ object ScholixUtils {
s.setDescription(d.get.getValue) s.setDescription(d.get.getValue)
} }
if (r.getSubject!= null && !r.getSubject.isEmpty) if (r.getSubject!= null && !r.getSubject.isEmpty) {
s.setSubject(r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)).asJava) val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut)
if (subjects.nonEmpty)
s.setSubject(subjects.asJava)
}
if (r.getPublisher!= null) if (r.getPublisher!= null)
s.setPublisher(List(r.getPublisher.getValue).asJava) s.setPublisher(List(r.getPublisher.getValue).asJava)
if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) {
val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut)
if (cf.nonEmpty)
s.setDatasources(cf.asJava)
}
s.setRelatedDatasets(0) s.setRelatedDatasets(0)
s.setRelatedPublications(0) s.setRelatedPublications(0)
s.setRelatedUnknown(0) s.setRelatedUnknown(0)