dnet-hadoop/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala

152 lines
4.4 KiB
Scala

package eu.dnetlib.doiboost.orcid
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI}
import eu.dnetlib.doiboost.DoiBoostMappingUtil
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
import org.apache.commons.lang.StringUtils
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods._
case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){}
case class OrcidAuthor(oid:String, name:Option[String], surname:Option[String], creditName:Option[String], otherNames:Option[List[String]], errorCode:Option[String]){}
case class OrcidWork(oid:String, doi:String)
case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {}
object ORCIDToOAF {
val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
val mapper = new ObjectMapper()
def isJsonValid(inputStr: String): Boolean = {
import java.io.IOException
try {
mapper.readTree(inputStr)
true
} catch {
case e: IOException =>
false
}
}
def extractValueFromInputString(input: String): (String, String) = {
val i = input.indexOf('[')
if (i <5) {
return null
}
val orcidList = input.substring(i, input.length - 1)
val doi = input.substring(1, i - 1)
if (isJsonValid(orcidList)) {
(doi, orcidList)
} else null
}
def strValid(s:Option[String]) : Boolean = {
s.isDefined && s.get.nonEmpty
}
def authorValid(author:OrcidAuthor): Boolean ={
if (strValid(author.name) && strValid(author.surname)) {
return true
}
if (strValid(author.surname)) {
return true
}
if (strValid(author.creditName)) {
return true
}
false
}
def extractDOIWorks(input:String): List[OrcidWork] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
val oid = (json \ "workDetail" \"oid").extractOrElse[String](null)
if (oid == null)
return List()
val doi:List[(String, String)] = for {
JObject(extIds) <- json \ "workDetail" \"extIds"
JField("type", JString(typeValue)) <- extIds
JField("value", JString(value)) <- extIds
if "doi".equalsIgnoreCase(typeValue)
} yield (typeValue, value)
if (doi.nonEmpty) {
return doi.map(l =>OrcidWork(oid, l._2))
}
List()
}
def convertORCIDAuthor(input:String): OrcidAuthor = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
(json \"authorData" ).extractOrElse[OrcidAuthor](null)
}
def convertTOOAF(input:ORCIDItem) :Publication = {
val doi = input.doi
val pub:Publication = new Publication
pub.setPid(List(createSP(doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
pub.setDataInfo(generateDataInfo())
pub.setId(DoiBoostMappingUtil.generateIdentifier(pub, doi.toLowerCase))
try{
val l:List[Author]= input.authors.map(a=> {
generateAuthor(a)
})(collection.breakOut)
pub.setAuthor(l.asJava)
pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava)
pub.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
pub
} catch {
case e: Throwable =>
logger.info(s"ERROR ON GENERATE Publication from $input")
null
}
}
def generateOricPIDDatainfo():DataInfo = {
val di =DoiBoostMappingUtil.generateDataInfo("0.91")
di.getProvenanceaction.setClassid("sysimport:crosswalk:entityregistry")
di.getProvenanceaction.setClassname("Harvested")
di
}
def generateAuthor(o : OrcidAuthor): Author = {
val a = new Author
if (strValid(o.name)) {
a.setName(o.name.get.capitalize)
}
if (strValid(o.surname)) {
a.setSurname(o.surname.get.capitalize)
}
if(strValid(o.name) && strValid(o.surname))
a.setFullname(s"${o.name.get.capitalize} ${o.surname.get.capitalize}")
else if (strValid(o.creditName))
a.setFullname(o.creditName.get)
if (StringUtils.isNotBlank(o.oid))
a.setPid(List(createSP(o.oid, ModelConstants.ORCID, ModelConstants.DNET_PID_TYPES, generateOricPIDDatainfo())).asJava)
a
}
}