From 0e45f4d450ce120fc26c729618cf5498b7ea5e25 Mon Sep 17 00:00:00 2001 From: sandro Date: Mon, 20 Apr 2020 13:26:29 +0200 Subject: [PATCH] continue mapping from crossref to OAF --- .../doiboost/crossref/Crossref2Oaf.scala | 181 +++++++++++------- .../src/test/resources/log4j.properties | 1 + 2 files changed, 114 insertions(+), 68 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 8a1144a1bb..83259f5ec1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -1,70 +1,85 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.utils.DHPUtils import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods._ import org.slf4j.Logger import scala.collection.JavaConverters._ + class Crossref2Oaf { +//STATIC STRING + val MAG = "MAG" + val ORCID = "ORCID" + val CROSSREF = "Crossref" + val UNPAYWALL = "UnpayWall" + val GRID_AC = "grid.ac" + val WIKPEDIA = "wikpedia" + val doiBoostNSPREFIX = "doiboost____" + val OPENAIRE_PREFIX = "openaire____" + val SEPARATOR = "::" + val DNET_LANGUAGES = "dnet:languages" val PID_TYPES = "dnet:pid_types" + + val mappingCrossrefType = Map( - "book-section" -> "publication", - "book" -> "publication", - "book-chapter" -> "publication", - "book-part" -> "publication", - "book-series" -> "publication", - "book-set" -> "publication", - "book-track" -> "publication", - "edited-book" -> "publication", - "reference-book" -> "publication", - "monograph" -> "publication", - "journal-article" -> "publication", - "dissertation" -> "publication", - "other" -> "publication", - "peer-review" -> "publication", - "proceedings" -> "publication", - "proceedings-article" -> "publication", - "reference-entry" -> "publication", - "report" -> "publication", - "report-series" -> "publication", - "standard" -> "publication", - "standard-series" -> "publication", - "posted-content"-> "publication", - "dataset" -> "dataset" + "book-section" -> "publication", + "book" -> "publication", + "book-chapter" -> "publication", + "book-part" -> "publication", + "book-series" -> "publication", + "book-set" -> "publication", + "book-track" -> "publication", + "edited-book" -> "publication", + "reference-book" -> "publication", + "monograph" -> "publication", + "journal-article" -> "publication", + "dissertation" -> "publication", + "other" -> "publication", + "peer-review" -> "publication", + "proceedings" -> "publication", + "proceedings-article" -> "publication", + "reference-entry" -> "publication", + "report" -> "publication", + "report-series" -> "publication", + "standard" -> "publication", + "standard-series" -> "publication", + "posted-content" -> "publication", + "dataset" -> "dataset" ) val mappingCrossrefSubType = Map( - "book-section" -> "0013 Part of book or chapter of book", - "book" -> "0002 Book", - "book-chapter" -> "0013 Part of book or chapter of book", - "book-part" -> "0013 Part of book or chapter of book", - "book-series" -> "0002 Book", - "book-set" -> "0002 Book", - "book-track" -> "0002 Book", - "edited-book" -> "0002 Book", - "reference-book" -> "0002 Book", - "monograph" -> "0002 Book", - "journal-article" -> "0001 Article", - "dissertation" -> "0006 Doctoral thesis", - "other" -> "0038 Other literature type", - "peer-review" -> "0015 Review", - "proceedings" -> "0004 Conference object", - "proceedings-article" -> "0004 Conference object", - "reference-entry" -> "0013 Part of book or chapter of book", - "report" -> "0017 Report", - "report-series" -> "0017 Report", - "standard" -> "0038 Other literature type", - "standard-series" -> "0038 Other literature type", - "dataset"-> "0021 Dataset", - "preprint"-> "0016 Preprint", - "report"-> "0017 Report" + "book-section" -> "0013 Part of book or chapter of book", + "book" -> "0002 Book", + "book-chapter" -> "0013 Part of book or chapter of book", + "book-part" -> "0013 Part of book or chapter of book", + "book-series" -> "0002 Book", + "book-set" -> "0002 Book", + "book-track" -> "0002 Book", + "edited-book" -> "0002 Book", + "reference-book" -> "0002 Book", + "monograph" -> "0002 Book", + "journal-article" -> "0001 Article", + "dissertation" -> "0006 Doctoral thesis", + "other" -> "0038 Other literature type", + "peer-review" -> "0015 Review", + "proceedings" -> "0004 Conference object", + "proceedings-article" -> "0004 Conference object", + "reference-entry" -> "0013 Part of book or chapter of book", + "report" -> "0017 Report", + "report-series" -> "0017 Report", + "standard" -> "0038 Other literature type", + "standard-series" -> "0038 Other literature type", + "dataset" -> "0021 Dataset", + "preprint" -> "0016 Preprint", + "report" -> "0017 Report" ) - def convert(input: String, logger:Logger): Result = { + def convert(input: String, logger: Logger): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val objectType = (json \ "type").extractOrElse[String](null) @@ -74,42 +89,72 @@ class Crossref2Oaf { val result = generateItemFromType(objectType, objectSubType) if (result == null) return result - val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType,mappingCrossrefSubType.getOrElse(objectSubType,"0038 Other literature type")); + val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")); + logger.debug(mappingCrossrefType(objectType)) + logger.debug(cOBJCategory) - logger.info(mappingCrossrefType(objectType)) - logger.info(cOBJCategory) - val doi:String = (json \ "DOI").extract[String] - val pid = new StructuredProperty() - pid.setValue(doi) - pid.setQualifier(new Qualifier) - result.setPid(List(createSP(doi,"doi", PID_TYPES)).asJava) + //MAPPING Crossref DOI into PID + val doi: String = (json \ "DOI").extract[String] - logger.info(doi) + result.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava) + //MAPPING Crossref DOI into OriginalId + result.setOriginalId(List(doi).asJava) + //Set identifier as {50|60} | doiboost____::md5(DOI) + result.setId(generateIdentifier(result, doi)) + + // Add DataInfo + result.setDataInfo(generateDataInfo()) + + result.setLastupdatetimestamp((json \"indexed" \"timestamp").extract[Long]) + + result.setDateofcollection((json \"indexed" \"date-time").extract[String]) result } - def createSP(value:String, classId:String, schemeId:String ):StructuredProperty = { + def generateIdentifier(oaf: Result, doi:String): String = { + val id = DHPUtils.md5(doi.toLowerCase) + if (oaf.isInstanceOf[Dataset]) + return s"60|${doiBoostNSPREFIX}${SEPARATOR}${id}" + s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}" + } + + def generateDataInfo(): DataInfo = { + val di =new DataInfo + di.setDeletedbyinference(false) + di.setInferred(false) + di.setInvisible(false) + di.setTrust("0.9") + di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions")) + di + } + + + def createSP(value: String, classId: String, schemeId: String): StructuredProperty = { val sp = new StructuredProperty - val q = new Qualifier - q.setClassid(classId) - q.setClassname(classId) - q.setSchemeid(schemeId) - q.setSchemename(schemeId ) + sp.setQualifier(createQualifier(classId, schemeId)) sp.setValue(value) - sp.setQualifier(q) sp } + def createQualifier(cls:String, sch:String):Qualifier = { + val q = new Qualifier + q.setClassid(cls) + q.setClassname(cls) + q.setSchemeid(sch) + q.setSchemename(sch) + q + } - def generateItemFromType (objectType:String, objectSubType:String):Result = { - if (mappingCrossrefType.contains(objectType)){ + + def generateItemFromType(objectType: String, objectSubType: String): Result = { + if (mappingCrossrefType.contains(objectType)) { if (mappingCrossrefType(objectType).equalsIgnoreCase("publication")) - return new Publication() + return new Publication() if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset")) - return new Dataset() + return new Dataset() } null } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties index 0fb67b578b..20f56e38dd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties +++ b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties @@ -6,5 +6,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.logger.org = ERROR +log4j.logger.eu.dnetlib = DEBUG log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file