continue mapping from crossref to OAF

This commit is contained in:
Sandro La Bruzzo 2020-04-20 13:26:29 +02:00
parent eef60bb9f4
commit 0e45f4d450
2 changed files with 114 additions and 68 deletions

View File

@ -1,15 +1,30 @@
package eu.dnetlib.doiboost.crossref package eu.dnetlib.doiboost.crossref
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.utils.DHPUtils
import org.json4s import org.json4s
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods._ import org.json4s.jackson.JsonMethods._
import org.slf4j.Logger import org.slf4j.Logger
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
class Crossref2Oaf { class Crossref2Oaf {
//STATIC STRING
val MAG = "MAG"
val ORCID = "ORCID"
val CROSSREF = "Crossref"
val UNPAYWALL = "UnpayWall"
val GRID_AC = "grid.ac"
val WIKPEDIA = "wikpedia"
val doiBoostNSPREFIX = "doiboost____"
val OPENAIRE_PREFIX = "openaire____"
val SEPARATOR = "::"
val DNET_LANGUAGES = "dnet:languages"
val PID_TYPES = "dnet:pid_types" val PID_TYPES = "dnet:pid_types"
val mappingCrossrefType = Map( val mappingCrossrefType = Map(
"book-section" -> "publication", "book-section" -> "publication",
"book" -> "publication", "book" -> "publication",
@ -32,7 +47,7 @@ class Crossref2Oaf {
"report-series" -> "publication", "report-series" -> "publication",
"standard" -> "publication", "standard" -> "publication",
"standard-series" -> "publication", "standard-series" -> "publication",
"posted-content"-> "publication", "posted-content" -> "publication",
"dataset" -> "dataset" "dataset" -> "dataset"
) )
@ -59,12 +74,12 @@ class Crossref2Oaf {
"report-series" -> "0017 Report", "report-series" -> "0017 Report",
"standard" -> "0038 Other literature type", "standard" -> "0038 Other literature type",
"standard-series" -> "0038 Other literature type", "standard-series" -> "0038 Other literature type",
"dataset"-> "0021 Dataset", "dataset" -> "0021 Dataset",
"preprint"-> "0016 Preprint", "preprint" -> "0016 Preprint",
"report"-> "0017 Report" "report" -> "0017 Report"
) )
def convert(input: String, logger:Logger): Result = { def convert(input: String, logger: Logger): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input) lazy val json: json4s.JValue = parse(input)
val objectType = (json \ "type").extractOrElse[String](null) val objectType = (json \ "type").extractOrElse[String](null)
@ -74,38 +89,68 @@ class Crossref2Oaf {
val result = generateItemFromType(objectType, objectSubType) val result = generateItemFromType(objectType, objectSubType)
if (result == null) if (result == null)
return result return result
val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType,mappingCrossrefSubType.getOrElse(objectSubType,"0038 Other literature type")); val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type"));
logger.debug(mappingCrossrefType(objectType))
logger.debug(cOBJCategory)
logger.info(mappingCrossrefType(objectType)) //MAPPING Crossref DOI into PID
logger.info(cOBJCategory) val doi: String = (json \ "DOI").extract[String]
val doi:String = (json \ "DOI").extract[String]
val pid = new StructuredProperty()
pid.setValue(doi)
pid.setQualifier(new Qualifier)
result.setPid(List(createSP(doi,"doi", PID_TYPES)).asJava)
logger.info(doi) result.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
//MAPPING Crossref DOI into OriginalId
result.setOriginalId(List(doi).asJava)
//Set identifier as {50|60} | doiboost____::md5(DOI)
result.setId(generateIdentifier(result, doi))
// Add DataInfo
result.setDataInfo(generateDataInfo())
result.setLastupdatetimestamp((json \"indexed" \"timestamp").extract[Long])
result.setDateofcollection((json \"indexed" \"date-time").extract[String])
result result
} }
def createSP(value:String, classId:String, schemeId:String ):StructuredProperty = { def generateIdentifier(oaf: Result, doi:String): String = {
val id = DHPUtils.md5(doi.toLowerCase)
if (oaf.isInstanceOf[Dataset])
return s"60|${doiBoostNSPREFIX}${SEPARATOR}${id}"
s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}"
}
def generateDataInfo(): DataInfo = {
val di =new DataInfo
di.setDeletedbyinference(false)
di.setInferred(false)
di.setInvisible(false)
di.setTrust("0.9")
di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
di
}
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty val sp = new StructuredProperty
val q = new Qualifier sp.setQualifier(createQualifier(classId, schemeId))
q.setClassid(classId)
q.setClassname(classId)
q.setSchemeid(schemeId)
q.setSchemename(schemeId )
sp.setValue(value) sp.setValue(value)
sp.setQualifier(q)
sp sp
} }
def createQualifier(cls:String, sch:String):Qualifier = {
val q = new Qualifier
q.setClassid(cls)
q.setClassname(cls)
q.setSchemeid(sch)
q.setSchemename(sch)
q
}
def generateItemFromType (objectType:String, objectSubType:String):Result = {
if (mappingCrossrefType.contains(objectType)){ def generateItemFromType(objectType: String, objectSubType: String): Result = {
if (mappingCrossrefType.contains(objectType)) {
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication")) if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
return new Publication() return new Publication()
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset")) if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))

View File

@ -6,5 +6,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout. # A1 uses PatternLayout.
log4j.logger.org = ERROR log4j.logger.org = ERROR
log4j.logger.eu.dnetlib = DEBUG
log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n