2023-11-24 12:39:58 +01:00
|
|
|
package eu.dnetlib.dhp.enrich.orcid
|
|
|
|
|
2023-11-30 14:36:50 +01:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
2023-11-24 12:39:58 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
|
|
|
|
import eu.dnetlib.dhp.schema.sx.OafUtils
|
|
|
|
import org.apache.spark.sql.Row
|
|
|
|
|
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
|
|
|
object AuthorEnricher extends Serializable {
|
|
|
|
|
|
|
|
def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
|
|
|
|
val a = new Author
|
|
|
|
a.setName(givenName)
|
|
|
|
a.setSurname(familyName)
|
|
|
|
a.setFullname(s"$givenName $familyName")
|
2023-11-30 14:36:50 +01:00
|
|
|
val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
|
|
|
|
pid.setDataInfo(OafUtils.generateDataInfo())
|
|
|
|
pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
|
|
|
|
a.setPid(List(pid).asJava)
|
2023-11-24 12:39:58 +01:00
|
|
|
a
|
|
|
|
}
|
|
|
|
|
|
|
|
def toOAFAuthor(r: Row): java.util.List[Author] = {
|
|
|
|
r.getList[Row](1)
|
|
|
|
.asScala
|
|
|
|
.map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
|
|
|
|
.toList
|
|
|
|
.asJava
|
|
|
|
}
|
|
|
|
|
|
|
|
// def enrichAuthor(p:Publication,r:Row): Unit = {
|
|
|
|
// val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
|
|
|
|
// println(k)
|
|
|
|
//
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// }
|
|
|
|
|
|
|
|
}
|