fixed mapping OAF to Scholix summary

This commit is contained in:
Sandro La Bruzzo 2021-07-02 16:48:48 +02:00
parent 8fa0841898
commit e4b84ef5d6
2 changed files with 29 additions and 7 deletions

View File

@ -35,7 +35,7 @@ object SparkCreateSummaryObject {
val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result]
ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).write.mode(SaveMode.Overwrite).save(targetPath)
ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath)
}

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.sx.graph.scholix
import eu.dnetlib.dhp.schema.oaf.{Dataset, Result}
import eu.dnetlib.dhp.schema.sx.summary.{SchemeValue, ScholixSummary, TypedIdentifier, Typology}
import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, TypedIdentifier, Typology}
import scala.collection.JavaConverters._
import scala.language.postfixOps
object ScholixUtils {
@ -11,7 +12,13 @@ object ScholixUtils {
def resultToSummary(r:Result):ScholixSummary = {
val s = new ScholixSummary
s.setId(r.getId)
s.setLocalIdentifier(r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid)).asJava)
if (r.getPid == null || r.getPid.isEmpty)
return null
val pids:List[TypedIdentifier] = r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid))(collection breakOut)
s.setLocalIdentifier(pids.asJava)
s.getLocalIdentifier.isEmpty
if (r.isInstanceOf[Dataset])
s.setTypology(Typology.dataset)
@ -21,11 +28,17 @@ object ScholixUtils {
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) {
s.setTitle(r.getTitle.asScala.map(t => t.getValue).asJava)
val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut)
if (titles.nonEmpty)
s.setTitle(titles.asJava)
else
return null
}
if(r.getAuthor!= null && !r.getAuthor.isEmpty) {
s.setAuthor(r.getAuthor.asScala.map(a=> a.getFullname).asJava)
val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut)
if (authors nonEmpty)
s.setAuthor(authors.asJava)
}
if (r.getInstance() != null) {
val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut)
@ -38,12 +51,21 @@ object ScholixUtils {
s.setDescription(d.get.getValue)
}
if (r.getSubject!= null && !r.getSubject.isEmpty)
s.setSubject(r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)).asJava)
if (r.getSubject!= null && !r.getSubject.isEmpty) {
val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut)
if (subjects.nonEmpty)
s.setSubject(subjects.asJava)
}
if (r.getPublisher!= null)
s.setPublisher(List(r.getPublisher.getValue).asJava)
if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) {
val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut)
if (cf.nonEmpty)
s.setDatasources(cf.asJava)
}
s.setRelatedDatasets(0)
s.setRelatedPublications(0)
s.setRelatedUnknown(0)