From 8af8b2ea27d63141367056da19b458266d79aa5c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 14 Feb 2023 17:20:13 +0100 Subject: [PATCH] Now Crossref mapping and dhp-aggregation compile --- .../dhp/schema/common/ModelConstants.java | 36 +- .../schema/oaf/utils/CleaningFunctions.java | 29 +- .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 8 +- .../dhp/common/vocabulary/VocabularyTest.java | 2 +- .../oaf/utils/IdentifierFactoryTest.java | 23 +- .../dhp/crossref/CrossrefUtility.scala | 492 +++++++++++++----- .../DataciteToOAFTransformation.scala | 1 - 8 files changed, 447 insertions(+), 146 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index dc38f218f..f10fda99d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -4,12 +4,14 @@ package eu.dnetlib.dhp.schema.common; import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class ModelConstants { private ModelConstants() { } + public static final String DOI = "doi"; public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; @@ -219,7 +221,7 @@ public class ModelConstants { public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier( "alternative title", "alternative title", DNET_DATACITE_TITLE); - private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); + public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); public static final AccessRight OPEN_ACCESS_RIGHT() { @@ -230,6 +232,38 @@ public class ModelConstants { return result; } + public static final AccessRight RESTRICTED_ACCESS_RIGHT() { + final AccessRight result = new AccessRight(); + result.setClassid("RESTRICTED"); + result.setClassname("Restricted"); + result.setSchemeid(ModelConstants.DNET_ACCESS_MODES); + return result; + } + + public static final AccessRight UNKNOWN_ACCESS_RIGHT() { + return OafMapperUtils.accessRight( + ModelConstants.UNKNOWN, + ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES + ); + } + + public static final AccessRight EMBARGOED_ACCESS_RIGHT() { + return OafMapperUtils.accessRight( + ACCESS_RIGHT_EMBARGO, + ACCESS_RIGHT_EMBARGO, + DNET_ACCESS_MODES + ); + } + + public static final AccessRight CLOSED_ACCESS_RIGHT() { + return OafMapperUtils.accessRight( + ACCESS_RIGHT_CLOSED, + "Closed Access", + ModelConstants.DNET_ACCESS_MODES + ); + } + private static Qualifier qualifier( final String classid, final String classname, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index c0ef339bd..aaae0fe0a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -6,13 +6,17 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import lombok.val; public class CleaningFunctions { public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)"; + + private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)"; public static final String DOI_PREFIX = "10."; public static final Set PID_BLACKLIST = new HashSet<>(); @@ -58,6 +62,27 @@ public class CleaningFunctions { return pid; } + /** + * This utility was moved from DOIBoost, + * it implements a better cleaning of DOI. + * In case of wrong DOI it raises an illegalArgumentException + * @param input DOI + * @return normalized DOI + */ + private static String normalizeDOI(final String input) { + if (input == null) + throw new IllegalArgumentException("PID value cannot be empty"); + final String replaced = input + .replaceAll(ALL_SPACES_REGEX, "") + .toLowerCase() + .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); + if (StringUtils.isEmpty(replaced.trim())) + throw new IllegalArgumentException("PID value normalized return empty string"); + if (!replaced.contains("10.")) + throw new IllegalArgumentException("DOI Must starts with 10."); + return replaced.substring(replaced.indexOf("10.")); + } + public static String normalizePidValue(String pidType, String pidValue) { String value = Optional .ofNullable(pidValue) @@ -67,8 +92,8 @@ public class CleaningFunctions { switch (pidType) { // TODO add cleaning for more PID types as needed - case "doi": - return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); + case ModelConstants.DOI: + return normalizeDOI(value.toLowerCase()); } return value; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index fff9ac885..b70250f26 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -495,7 +495,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { private static AccessRight accessRight(String classid, String classname, String scheme) { return OafMapperUtils .accessRight( - classid, classname, scheme, scheme); + classid, classname, scheme); } private static Qualifier qualifier(String classid, String classname, String scheme) { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 723254bab..5c7e237fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -55,19 +55,19 @@ public class OafMapperUtils { return qualifier(UNKNOWN, "Unknown", schemeid); } + + public static AccessRight accessRight( final String classid, final String classname, - final String schemeid, - final String schemename) { - return accessRight(classid, classname, schemeid, schemename, null); + final String schemeid) { + return accessRight(classid, classname, schemeid, null); } public static AccessRight accessRight( final String classid, final String classname, final String schemeid, - final String schemename, final OpenAccessRoute openAccessRoute) { final AccessRight accessRight = new AccessRight(); accessRight.setClassid(classid); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java index 0650dc53b..eb4a092cf 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java @@ -67,7 +67,7 @@ public class VocabularyTest { if (t1 == null) { System.err.println(s1 + " Missing"); } else { - System.out.println("syn=" + s1 + " term = " + t1.getClassid()); + System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname()); System.out .println( diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 08339c3a1..a26e1c83d 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.schema.oaf.utils; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.DeserializationFeature; @@ -81,4 +81,23 @@ class IdentifierFactoryTest { assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5)); } + @Test + void testNormalizeDOI() throws Exception { + + final String doi = "10.1042/BCJ20160876"; + + assertEquals(CleaningFunctions.normalizePidValue("doi", doi), doi.toLowerCase()); + final String doi2 = "0.1042/BCJ20160876"; + assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi2)); + + final String doi3 = "https://doi.org/0.1042/BCJ20160876"; + assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi3)); + + final String doi4 = "https://doi.org/10.1042/BCJ20160876"; + assertEquals(CleaningFunctions.normalizePidValue("doi", doi4), "10.1042/BCJ20160876".toLowerCase()); + + final String doi5 = "https://doi.org/10.1042/ BCJ20160876"; + assertEquals(CleaningFunctions.normalizePidValue("doi", doi5), "10.1042/BCJ20160876".toLowerCase()); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala index 4d81b4858..18299cb87 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala @@ -2,90 +2,34 @@ package eu.dnetlib.dhp.crossref import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ +import eu.dnetlib.dhp.schema.oaf.utils._ import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString, JValue} +import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} +import java.time.LocalDate +import java.time.format.DateTimeFormatter import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer +import scala.util.matching.Regex case class CrossrefDT(doi: String, json: String, timestamp: Long) {} +case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){} + +case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} + object CrossrefUtility { - val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" - val DOI_PREFIX = "10." val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME) - def normalizeDoi(input: String): String = { - if (input == null) - return null - val replaced = input - .replaceAll("(?:\\n|\\r|\\t|\\s)", "") - .toLowerCase - .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) - if (replaced == null || replaced.trim.isEmpty) - return null - if (replaced.indexOf("10.") < 0) - return null - val ret = replaced.substring(replaced.indexOf("10.")) - if (!ret.startsWith(DOI_PREFIX)) - return null - ret - } + val logger: Logger = LoggerFactory.getLogger(getClass) - def extractDate(dt: String, datePart: List[List[Int]]): String = { - if (StringUtils.isNotBlank(dt)) - return GraphCleaningFunctions.cleanDate(dt) - if (datePart != null && datePart.size == 1) { - val res = datePart.head - if (res.size == 3) { - val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d" - if (dp.length == 10) { - return GraphCleaningFunctions.cleanDate(dp) - } - } else if (res.size == 2) { - val dp = f"${res.head}-${res(1)}%02d-01" - return GraphCleaningFunctions.cleanDate(dp) - } else if (res.size == 1) { - return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01") - } - } - null - - } - - private def generateDate( - dt: String, - datePart: List[List[Int]], - classId: String, - schemeId: String - ): StructuredProperty = { - val dp = extractDate(dt, datePart) - if (StringUtils.isNotBlank(dp)) - structuredProperty(dp, classId, classId, schemeId) - else - null - } - - private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = { - val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) - if (term != null) { - val resourceType = - vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname - - resourceType match { - case "publication" => (new Publication, resourceType) - case "dataset" => (new Dataset, resourceType) - case "software" => (new Software, resourceType) - case "otherresearchproduct" => (new OtherResearchProduct, resourceType) - } - } else - null - } def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -103,7 +47,8 @@ object CrossrefUtility { val result = resultWithType._1 val cOBJCategory = resultWithType._2 - mappingResult(result, json, cOBJCategory) + val className = resultWithType._3 + mappingResult(result, json, cOBJCategory, className) if (result == null || result.getId == null) return List() @@ -111,29 +56,144 @@ object CrossrefUtility { (json \ "funder").extractOrElse[List[mappingFunder]](List()) if (funderList.nonEmpty) { - resultList = resultList ::: mappingFunderToRelations( - funderList, - result.getId, - createCrossrefCollectedFrom(), - result.getDataInfo, - result.getLastupdatetimestamp - ) + resultList = resultList ::: mappingFunderToRelations(funderList, result ) } - - result match { - case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) - } - resultList = resultList ::: List(result) resultList } - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { + private def createRelation(sourceId: String, targetId: String, relClass: String): Relation = { + val r = new Relation + r.setSource(sourceId) + r.setTarget(targetId) + r.setRelType(ModelConstants.RESULT_PROJECT) + r.setRelClass(relClass) + r.setSubRelType(ModelConstants.OUTCOME) + r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava) + r + } + + + private def generateSimpleRelationFromAward( + funder: mappingFunder, + nsPrefix: String, + extractField: String => String, + source:Result + ): List[Relation] = { + if (funder.award.isDefined && funder.award.get.nonEmpty) + funder.award.get + .map(extractField) + .filter(a => a != null && a.nonEmpty) + .map(award => { + val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true) + createRelation(targetId, source.getId, ModelConstants.PRODUCES) + }) + else List() + } + + private def extractECAward(award: String): String = { + val awardECRegex: Regex = "[0-9]{4,9}".r + if (awardECRegex.findAllIn(award).hasNext) + return awardECRegex.findAllIn(award).max + null + } + + private def snsfRule(award: String): String = { + val tmp1 = StringUtils.substringAfter(award, "_") + val tmp2 = StringUtils.substringBefore(tmp1, "/") + tmp2 + + } + + private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = { + var relList:List[Relation] = List() + + if (funders != null) + funders.foreach(funder => { + if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { + funder.DOI.get match { + case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | + "10.13039/100010665" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + case "10.13039/501100000781" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result) + case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) + case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result) + case "10.13039/501100001602" => + relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result) + case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result) + case "10.13039/501100000038" => + val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100000155" => + val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100000024" => + val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result) + case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result) + case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result) + case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result) + case "10.13039/501100003407" => + relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result) + val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100006588" | "10.13039/501100004488" => + relList =relList ::: generateSimpleRelationFromAward( + funder, + "irb_hr______", + a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result + ) + case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result) + case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result) + case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result) + case "10.13039/100004440" => + relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) + val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case _ => logger.debug("no match for " + funder.DOI.get) + + } + + } else { + funder.name match { + case "European Union’s Horizon 2020 research and innovation program" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "European Union's" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + case "The French National Research Agency (ANR)" | "The French National Research Agency" => + relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) + case "CONICYT, Programa de Formación de Capital Humano Avanzado" => + relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result) + case "Wellcome Trust Masters Fellowship" => + relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) + val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case _ => logger.debug("no match for " + funder.name) + + } + } + + }) + relList + + } + + + + + + private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats //MAPPING Crossref DOI into PID - val doi: String = normalizeDoi((json \ "DOI").extract[String]) + val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String]) result.setPid( List( @@ -176,9 +236,7 @@ object CrossrefUtility { } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val subtitles = for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( - title, - ModelConstants.SUBTITLE_QUALIFIER - ) + title, ModelConstants.SUBTITLE_QUALIFIER) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION @@ -244,21 +302,28 @@ object CrossrefUtility { if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava - ) + subjectList.map(s => + OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null) + ).asJava) } //Mapping Author - val authorList: List[mappingAuthor] = - (json \ "author").extractOrElse[List[mappingAuthor]](List()) + val authorList:List[CrossrefAuthor] = + for { + JObject(author) <- json \ "author" + JField("ORCID", JString(orcid)) <- author + JField("given", JString(givenName)) <- author + JField("family", JString(familyName)) <- author + JField("sequence", JString(sequence)) <- author + } yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0) - val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => - a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") - ) - - result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) - }.asJava) + result.setAuthor(authorList.sortWith((a,b) =>{ + if (a.sequence.equalsIgnoreCase("first")) + true + else if (b.sequence.equalsIgnoreCase("first")) + false + else a.familyName< b.familyName + }).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava) // Mapping instance val instance = new Instance() @@ -266,8 +331,8 @@ object CrossrefUtility { JObject(license) <- json \ "license" JField("URL", JString(lic)) <- license JField("content-version", JString(content_version)) <- license - } yield (asField(lic), content_version) - val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) + } yield (new License(lic), content_version) + val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl)) if (l.nonEmpty) { if (l exists (d => d._2.equals("vor"))) { for (d <- l) { @@ -290,66 +355,225 @@ object CrossrefUtility { OafMapperUtils.qualifier( "0001", "peerReviewed", - ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS ) ) } - instance.setAccessright( - decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) - ) + if (instance.getLicense!= null) + instance.setAccessright( + decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance) + ) instance.setInstancetype( OafMapperUtils.qualifier( - cobjCategory.substring(0, 4), - cobjCategory.substring(5), - ModelConstants.DNET_PUBLICATION_RESOURCE, + cobjCategory, + className, ModelConstants.DNET_PUBLICATION_RESOURCE ) ) result.setResourcetype( OafMapperUtils.qualifier( - cobjCategory.substring(0, 4), - cobjCategory.substring(5), - ModelConstants.DNET_PUBLICATION_RESOURCE, + cobjCategory, + className, ModelConstants.DNET_PUBLICATION_RESOURCE ) ) - instance.setCollectedfrom(createCrossrefCollectedFrom()) + instance.setCollectedfrom(CROSSREF_COLLECTED_FROM) if (StringUtils.isNotBlank(issuedDate)) { - instance.setDateofacceptance(asField(issuedDate)) + instance.setDateofacceptance(issuedDate) } else { - instance.setDateofacceptance(asField(createdDate.getValue)) + instance.setDateofacceptance(createdDate.getValue) } val s: List[String] = List("https://doi.org/" + doi) - // val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct - // if (links.nonEmpty) { - // instance.setUrl(links.asJava) - // } if (s.nonEmpty) { instance.setUrl(s.asJava) } + val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct + //Mapping book + if (className.toLowerCase.contains("book")) { + val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn + if (ISBN.nonEmpty && containerTitles.nonEmpty) { + val source = s"${containerTitles.head} ISBN: ${ISBN.head}" + if (result.getSource != null) { + val l: List[String] = result.getSource.asScala.toList ::: List(source) + result.setSource(l.asJava) + } else + result.setSource(List(source).asJava) + } + } else { + // Mapping Journal + val issnInfos = for { + JObject(issn_type) <- json \ "issn-type" + JField("type", JString(tp)) <- issn_type + JField("value", JString(vl)) <- issn_type + } yield Tuple2(tp, vl) + + val volume = (json \ "volume").extractOrElse[String](null) + if (containerTitles.nonEmpty) { + val journal = new Journal + journal.setName(containerTitles.head) + if (issnInfos.nonEmpty) { + + issnInfos.foreach(tp => { + tp._1 match { + case "electronic" => journal.setIssnOnline(tp._2) + case "print" => journal.setIssnPrinted(tp._2) + } + }) + } + journal.setVol(volume) + val page = (json \ "page").extractOrElse[String](null) + if (page != null) { + val pp = page.split("-") + if (pp.nonEmpty) + journal.setSp(pp.head) + if (pp.size > 1) + journal.setEp(pp(1)) + } + result.setJournal(journal) + } + } + result.setInstance(List(instance).asJava) - - //IMPORTANT - //The old method result.setId(generateIdentifier(result, doi)) - //is replaced using IdentifierFactory, but the old identifier - //is preserved among the originalId(s) - val oldId = generateIdentifier(result, doi) - result.setId(oldId) - - val newId = IdentifierFactory.createDOIBoostIdentifier(result) - if (!oldId.equalsIgnoreCase(newId)) { - result.getOriginalId.add(oldId) - } - result.setId(newId) - - if (result.getId == null) + result.setId("ID") + result.setId(IdentifierFactory.createIdentifier(result, true)) + if (result.getId == null || "ID".equalsIgnoreCase(result.getId)) null else result } + def decideAccessRight(license: String, date: String): AccessRight = { + if (license == null || license.isEmpty) { + //Default value Unknown + return ModelConstants.UNKNOWN_ACCESS_RIGHT(); + } + //CC licenses + if ( + license.startsWith("cc") || + license.startsWith("http://creativecommons.org/licenses") || + license.startsWith("https://creativecommons.org/licenses") || + + //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || + + //APA (considered OPEN also by Unpaywall) + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") + ) { + + val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + + //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) + if ( + license.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) { + val now = java.time.LocalDate.now + + try { + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } else { + return ModelConstants.EMBARGOED_ACCESS_RIGHT() + } + } catch { + case _: Exception => { + try { + val pub_date = + LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = OPEN_ACCESS_RIGHT() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } else { + return ModelConstants.EMBARGOED_ACCESS_RIGHT() + } + } catch { + case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT() + } + } + + } + + } + + ModelConstants.CLOSED_ACCESS_RIGHT() + } + + + private def extractDate(dt: String, datePart: List[List[Int]]): String = { + if (StringUtils.isNotBlank(dt)) + return GraphCleaningFunctions.cleanDate(dt) + if (datePart != null && datePart.size == 1) { + val res = datePart.head + if (res.size == 3) { + val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d" + if (dp.length == 10) { + return GraphCleaningFunctions.cleanDate(dp) + } + } else if (res.size == 2) { + val dp = f"${res.head}-${res(1)}%02d-01" + return GraphCleaningFunctions.cleanDate(dp) + } else if (res.size == 1) { + return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01") + } + } + null + } + + private def generateDate( + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { + val dp = extractDate(dt, datePart) + if (StringUtils.isNotBlank(dp)) + structuredProperty(dp, classId, classId, schemeId) + else + null + } + + private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = { + val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) + if (term != null) { + val resourceType = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname + + resourceType match { + case "publication" => (new Publication, resourceType, term.getClassname) + case "dataset" => (new Dataset, resourceType, term.getClassname) + case "software" => (new Software, resourceType, term.getClassname) + case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname) + } + } else + null + } + + private def generateAuthor(ca: CrossrefAuthor): Author = { + val a = new Author + a.setName(ca.givenName) + a.setSurname(ca.familyName) + a.setFullname(s"${ca.familyName}, ${ca.givenName}") + a.setRank(ca.rank + 1) + if (StringUtils.isNotBlank(ca.ORCID)) + a.setPid( + List( + OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null) + ).asJava + ) + a + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index afb687b37..2696b5252 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -553,7 +553,6 @@ object DataciteToOAFTransformation { OafMapperUtils.accessRight( ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, - ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES )