2020-05-22 15:15:09 +02:00
|
|
|
package eu.dnetlib.doiboost.orcid
|
|
|
|
|
2020-12-23 16:59:52 +01:00
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
2021-03-31 18:33:57 +02:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
2021-03-17 15:53:24 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
2020-12-10 16:14:16 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
|
|
|
|
import eu.dnetlib.dhp.schema.orcid.OrcidDOI
|
2020-05-22 15:15:09 +02:00
|
|
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
2021-03-31 18:33:57 +02:00
|
|
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
|
2020-05-22 15:15:09 +02:00
|
|
|
import org.apache.commons.lang.StringUtils
|
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
|
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
|
|
|
|
|
|
|
case class ORCIDItem(oid:String,name:String,surname:String,creditName:String,errorCode:String){}
|
2020-05-29 09:32:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {}
|
2020-05-22 15:15:09 +02:00
|
|
|
object ORCIDToOAF {
|
2020-06-09 18:07:14 +02:00
|
|
|
val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
|
2020-12-23 16:59:52 +01:00
|
|
|
val mapper = new ObjectMapper()
|
2020-05-22 15:15:09 +02:00
|
|
|
|
|
|
|
def isJsonValid(inputStr: String): Boolean = {
|
|
|
|
import java.io.IOException
|
|
|
|
try {
|
|
|
|
mapper.readTree(inputStr)
|
|
|
|
true
|
|
|
|
} catch {
|
|
|
|
case e: IOException =>
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def extractValueFromInputString(input: String): (String, String) = {
|
|
|
|
val i = input.indexOf('[')
|
|
|
|
if (i <5) {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
val orcidList = input.substring(i, input.length - 1)
|
|
|
|
val doi = input.substring(1, i - 1)
|
|
|
|
if (isJsonValid(orcidList)) {
|
|
|
|
(doi, orcidList)
|
|
|
|
} else null
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-12-10 16:14:16 +01:00
|
|
|
def convertTOOAF(input:OrcidDOI) :Publication = {
|
|
|
|
val doi = input.getDoi
|
2020-05-22 15:15:09 +02:00
|
|
|
val pub:Publication = new Publication
|
2021-03-31 18:33:57 +02:00
|
|
|
pub.setPid(List(createSP(doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
2020-05-22 15:15:09 +02:00
|
|
|
pub.setDataInfo(generateDataInfo())
|
2021-03-17 15:53:24 +01:00
|
|
|
|
|
|
|
pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub))
|
|
|
|
if (pub.getId == null)
|
|
|
|
return null
|
|
|
|
|
2020-12-10 16:14:16 +01:00
|
|
|
try{
|
2020-11-09 11:53:55 +01:00
|
|
|
|
2020-12-10 16:14:16 +01:00
|
|
|
val l:List[Author]= input.getAuthors.asScala.map(a=> {
|
|
|
|
generateAuthor(a.getName, a.getSurname, a.getCreditName, a.getOid)
|
|
|
|
})(collection.breakOut)
|
2020-11-09 11:53:55 +01:00
|
|
|
|
2020-12-10 16:14:16 +01:00
|
|
|
pub.setAuthor(l.asJava)
|
2020-05-22 15:15:09 +02:00
|
|
|
pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava)
|
2020-05-29 09:32:04 +02:00
|
|
|
pub.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
|
2020-05-22 15:15:09 +02:00
|
|
|
pub
|
|
|
|
} catch {
|
|
|
|
case e: Throwable =>
|
|
|
|
logger.info(s"ERROR ON GENERATE Publication from $input")
|
|
|
|
null
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-10 16:14:16 +01:00
|
|
|
def generateOricPIDDatainfo():DataInfo = {
|
|
|
|
val di =DoiBoostMappingUtil.generateDataInfo("0.91")
|
2021-03-31 18:33:57 +02:00
|
|
|
di.getProvenanceaction.setClassid(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)
|
|
|
|
di.getProvenanceaction.setClassname(ModelConstants.HARVESTED)
|
2020-12-10 16:14:16 +01:00
|
|
|
di
|
|
|
|
}
|
|
|
|
|
2020-06-09 18:07:14 +02:00
|
|
|
def generateAuthor(given: String, family: String, fullName:String, orcid: String): Author = {
|
2020-05-22 15:15:09 +02:00
|
|
|
val a = new Author
|
|
|
|
a.setName(given)
|
|
|
|
a.setSurname(family)
|
|
|
|
if (fullName!= null && fullName.nonEmpty)
|
|
|
|
a.setFullname(fullName)
|
|
|
|
else
|
|
|
|
a.setFullname(s"$given $family")
|
|
|
|
if (StringUtils.isNotBlank(orcid))
|
2021-03-31 18:33:57 +02:00
|
|
|
a.setPid(List(createSP(orcid, ModelConstants.ORCID, ModelConstants.DNET_PID_TYPES, generateOricPIDDatainfo())).asJava)
|
2020-05-22 15:15:09 +02:00
|
|
|
|
|
|
|
a
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-12-10 16:14:16 +01:00
|
|
|
}
|