From 8f777af827b0125f60448efe8b96a7f34a45aea5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 14 Feb 2023 10:30:34 +0100 Subject: [PATCH 1/3] increased version number of dhp-code-style --- dhp-build/dhp-code-style/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 5a86efe17..dfc3c75d9 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.5-SNAPSHOT + 2.0.0-SNAPSHOT jar From 990e3e2f60d1131bf6998c850783b2d7342c9d08 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 14 Feb 2023 10:32:17 +0100 Subject: [PATCH 2/3] code refactor --- .../dhp/schema/common/ModelConstants.java | 8 +- .../dhp/schema/oaf/common/ModelSupport.java | 3 +- .../dhp/schema/oaf/utils/MergeUtils.java | 32 +++--- .../dhp/schema/oaf/utils/OafMapperUtils.java | 3 +- .../dhp/common/vocabulary/VocabularyTest.java | 107 +++++++++--------- .../dhp/crossref/CrossrefUtility.scala | 68 ++++++----- .../crossref/GenerateCrossrefDataset.scala | 18 +-- .../DataciteToOAFTransformation.scala | 10 +- .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 11 +- 9 files changed, 128 insertions(+), 132 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index d2ef9fa7b..dc38f218f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -7,7 +7,8 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; public class ModelConstants { - private ModelConstants() {} + private ModelConstants() { + } public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; @@ -80,7 +81,6 @@ public class ModelConstants { public static final String PROVENANCE_DEDUP = "sysimport:dedup"; public static final String PROVENANCE_ENRICH = "sysimport:enrich"; - public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier( SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS); @@ -127,8 +127,6 @@ public class ModelConstants { public static final String IS_REQUIRED_BY = "IsRequiredBy"; public static final String REQUIRES = "Requires"; - - public static final String CITATION = "citation"; // subreltype public static final String CITES = "Cites"; public static final String IS_CITED_BY = "IsCitedBy"; @@ -219,7 +217,7 @@ public class ModelConstants { "main title", "main title", DNET_DATACITE_TITLE); public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier( - "alternative title", "alternative title", DNET_DATACITE_TITLE); + "alternative title", "alternative title", DNET_DATACITE_TITLE); private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java index 3ea391bd4..3ee3ed5a1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java @@ -209,7 +209,8 @@ public class ModelSupport { return idPrefixMap.get(clazz); } - public static Boolean sameClass(X left, Y right, Class superClazz) { + public static Boolean sameClass(X left, Y right, + Class superClazz) { return isSubClass(left, superClazz) && isSubClass(right, superClazz); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index cc6e10d81..ae275681d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -31,10 +31,10 @@ public class MergeUtils { return mergeRelation(left, right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -53,10 +53,10 @@ public class MergeUtils { return mergeProject(left, right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -110,8 +110,8 @@ public class MergeUtils { mergedEntity.setLastupdatetimestamp(enrich.getLastupdatetimestamp()); } else if (mergedEntity.getLastupdatetimestamp() != null && enrich.getLastupdatetimestamp() != null) { mergedEntity - .setLastupdatetimestamp( - Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp())); + .setLastupdatetimestamp( + Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp())); } mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid())); @@ -138,7 +138,7 @@ public class MergeUtils { checkArgument(Objects.equals(original.getTarget(), enrich.getTarget()), "target ids must be equal"); checkArgument(Objects.equals(original.getRelType(), enrich.getRelType()), "relType(s) must be equal"); checkArgument( - Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); + Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(original.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal"); original.setProvenance(mergeLists(original.getProvenance(), enrich.getProvenance())); @@ -148,10 +148,10 @@ public class MergeUtils { original.setValidationDate(ModelSupport.oldest(original.getValidationDate(), enrich.getValidationDate())); } catch (ParseException e) { throw new IllegalArgumentException(String - .format( - "invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), - original.getTarget(), - original.getValidationDate())); + .format( + "invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), + original.getTarget(), + original.getValidationDate())); } return (T) original; @@ -370,7 +370,7 @@ public class MergeUtils { private static T mergePublication(T original, T enrich) { - //add publication specific fields. + // add publication specific fields. mergeEntityDataInfo(original, enrich); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index ff16cf4d8..723254bab 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -363,7 +363,8 @@ public class OafMapperUtils { final Entity entity, final String validationDate) { - final List provenance = getProvenance(entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo())); + final List provenance = getProvenance( + entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo())); return getRelation( source, target, relType, subRelType, relClass, provenance, validationDate, null); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java index 6529d43da..0650dc53b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java @@ -1,8 +1,13 @@ + package eu.dnetlib.dhp.common.vocabulary; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -12,73 +17,63 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -import static org.mockito.Mockito.lenient; - +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class VocabularyTest { + @Mock + protected ISLookUpService isLookUpService; - @Mock - protected ISLookUpService isLookUpService; + protected VocabularyGroup vocabularies; - protected VocabularyGroup vocabularies; + @BeforeEach + public void setUpVocabulary() throws ISLookUpException, IOException { - @BeforeEach - public void setUpVocabulary() throws ISLookUpException, IOException { + lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); - lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); + lenient() + .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) + .thenReturn(synonyms()); + vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService); + } - lenient() - .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) - .thenReturn(synonyms()); - vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService); - } + private static List vocs() throws IOException { + return IOUtils + .readLines( + Objects + .requireNonNull( + VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"))); + } - private static List vocs() throws IOException { - return IOUtils - .readLines( - Objects - .requireNonNull( - VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"))); - } + private static List synonyms() throws IOException { + return IOUtils + .readLines( + Objects + .requireNonNull( + VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"))); + } - private static List synonyms() throws IOException { - return IOUtils - .readLines( - Objects - .requireNonNull( - VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"))); - } + @Test + void testVocabularyMatch() throws Exception { + final String s = IOUtils.toString(this.getClass().getResourceAsStream("terms")); + for (String s1 : s.split("\n")) { - @Test - void testVocabularyMatch () throws Exception{ - final String s= IOUtils.toString(this.getClass().getResourceAsStream("terms")); + final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1); - for (String s1 : s.split("\n")) { + if (t1 == null) { + System.err.println(s1 + " Missing"); + } else { + System.out.println("syn=" + s1 + " term = " + t1.getClassid()); - final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1); + System.out + .println( + vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname()); + } + } - if (t1 == null) { - System.err.println(s1+ " Missing"); - } - else { - System.out.println("syn=" + s1 + " term = " + t1.getClassid()); - - - System.out.println(vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname()); - } - } - - - - - - } + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala index 28ea64c9b..4d81b4858 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala @@ -13,8 +13,8 @@ import org.json4s.jackson.JsonMethods.parse import scala.collection.JavaConverters._ - case class CrossrefDT(doi: String, json: String, timestamp: Long) {} + object CrossrefUtility { val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" val DOI_PREFIX = "10." @@ -37,7 +37,6 @@ object CrossrefUtility { ret } - def extractDate(dt: String, datePart: List[List[Int]]): String = { if (StringUtils.isNotBlank(dt)) return GraphCleaningFunctions.cleanDate(dt) @@ -60,36 +59,35 @@ object CrossrefUtility { } private def generateDate( - dt: String, - datePart: List[List[Int]], - classId: String, - schemeId: String - ): StructuredProperty = { + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { val dp = extractDate(dt, datePart) if (StringUtils.isNotBlank(dp)) - structuredProperty(dp, classId, classId,schemeId) + structuredProperty(dp, classId, classId, schemeId) else null } - - private def generateItemFromType(objectType: String, vocabularies:VocabularyGroup): (Result, String) = { + private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = { val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) if (term != null) { - val resourceType = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname + val resourceType = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname resourceType match { - case "publication" =>(new Publication, resourceType) - case "dataset" =>(new Dataset, resourceType) - case "software" => (new Software, resourceType) - case "otherresearchproduct" =>(new OtherResearchProduct, resourceType) + case "publication" => (new Publication, resourceType) + case "dataset" => (new Dataset, resourceType) + case "software" => (new Software, resourceType) + case "otherresearchproduct" => (new OtherResearchProduct, resourceType) } } else null } - - def convert(input: String, vocabularies:VocabularyGroup): List[Oaf] = { + def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -124,14 +122,13 @@ object CrossrefUtility { result match { case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) + case dataset: Dataset => convertDataset(dataset) } resultList = resultList ::: List(result) resultList } - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -140,8 +137,9 @@ object CrossrefUtility { result.setPid( List( - structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES) - ).asJava) + structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES) + ).asJava + ) //MAPPING Crossref DOI into OriginalId //and Other Original Identifier of dataset like clinical-trial-number @@ -149,11 +147,10 @@ object CrossrefUtility { val alternativeIds: List[String] = for (JString(ids) <- json \ "alternative-id") yield ids val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi) - result.setOriginalId(tmp.filter(id => id != null).asJava) // Add DataInfo - result.setDataInfo(dataInfo(false, false,0.9F,null, false,ModelConstants.REPOSITORY_PROVENANCE_ACTIONS)) + result.setDataInfo(dataInfo(false, false, 0.9f, null, false, ModelConstants.REPOSITORY_PROVENANCE_ACTIONS)) result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long]) result.setDateofcollection((json \ "indexed" \ "date-time").extract[String]) @@ -167,23 +164,26 @@ object CrossrefUtility { // TITLE val mainTitles = - for {JString(title) <- json \ "title" if title.nonEmpty} - yield - structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER) + for { JString(title) <- json \ "title" if title.nonEmpty } yield structuredProperty( + title, + ModelConstants.MAIN_TITLE_QUALIFIER + ) val originalTitles = for { JString(title) <- json \ "original-title" if title.nonEmpty } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val shortTitles = for { JString(title) <- json \ "short-title" if title.nonEmpty - } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) + } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val subtitles = - for {JString(title) <- json \ "subtitle" if title.nonEmpty} - yield structuredProperty(title, ModelConstants.SUBTITLE_QUALIFIER) + for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( + title, + ModelConstants.SUBTITLE_QUALIFIER + ) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION val descriptionList = - for {JString(description) <- json \ "abstract"} yield description + for { JString(description) <- json \ "abstract" } yield description result.setDescription(descriptionList.asJava) // Source @@ -242,11 +242,9 @@ object CrossrefUtility { //Mapping Subject val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List()) - - if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava + subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava ) } @@ -265,8 +263,8 @@ object CrossrefUtility { // Mapping instance val instance = new Instance() val license = for { - JObject(license) <- json \ "license" - JField("URL", JString(lic)) <- license + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license JField("content-version", JString(content_version)) <- license } yield (asField(lic), content_version) val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala index fd4bcd37d..0d45d1c83 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala @@ -3,20 +3,20 @@ package eu.dnetlib.dhp.crossref import eu.dnetlib.dhp.application.AbstractScalaApplication import org.slf4j.{Logger, LoggerFactory} -class GenerateCrossrefDataset (propertyPath: String, args: Array[String], log: Logger) - extends AbstractScalaApplication(propertyPath, args, log: Logger) { +class GenerateCrossrefDataset(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + /** Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + * where the whole logic of the spark node is defined + */ override def run(): Unit = ??? } - -object GenerateCrossrefDataset{ - val log:Logger = LoggerFactory.getLogger(getClass) - val propertyPath ="/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" +object GenerateCrossrefDataset { + val log: Logger = LoggerFactory.getLogger(getClass) + val propertyPath = "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" def main(args: Array[String]): Unit = { - new GenerateCrossrefDataset(propertyPath,args, log).initialize().run() + new GenerateCrossrefDataset(propertyPath, args, log).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 38a3350a0..afb687b37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -284,11 +284,11 @@ object DataciteToOAFTransformation { } def generateRelation( - sourceId: String, - targetId: String, - relClass: String, - collectedFrom: KeyValue, - di: DataInfo + sourceId: String, + targetId: String, + relClass: String, + collectedFrom: KeyValue, + di: DataInfo ): Relation = { val r = new Relation r.setSource(sourceId) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 091d48713..98a8c4c68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -360,10 +360,13 @@ object BioDBToOAF { val rel = new Relation - val provenance = OafMapperUtils.getProvenance(Lists.newArrayList( - collectedFrom, - collectedFromMap("pdb") - ), REL_DATA_INFO) + val provenance = OafMapperUtils.getProvenance( + Lists.newArrayList( + collectedFrom, + collectedFromMap("pdb") + ), + REL_DATA_INFO + ) rel.setProvenance(provenance) From 8af8b2ea27d63141367056da19b458266d79aa5c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 14 Feb 2023 17:20:13 +0100 Subject: [PATCH 3/3] Now Crossref mapping and dhp-aggregation compile --- .../dhp/schema/common/ModelConstants.java | 36 +- .../schema/oaf/utils/CleaningFunctions.java | 29 +- .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 8 +- .../dhp/common/vocabulary/VocabularyTest.java | 2 +- .../oaf/utils/IdentifierFactoryTest.java | 23 +- .../dhp/crossref/CrossrefUtility.scala | 492 +++++++++++++----- .../DataciteToOAFTransformation.scala | 1 - 8 files changed, 447 insertions(+), 146 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index dc38f218f..f10fda99d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -4,12 +4,14 @@ package eu.dnetlib.dhp.schema.common; import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class ModelConstants { private ModelConstants() { } + public static final String DOI = "doi"; public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; @@ -219,7 +221,7 @@ public class ModelConstants { public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier( "alternative title", "alternative title", DNET_DATACITE_TITLE); - private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); + public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); public static final AccessRight OPEN_ACCESS_RIGHT() { @@ -230,6 +232,38 @@ public class ModelConstants { return result; } + public static final AccessRight RESTRICTED_ACCESS_RIGHT() { + final AccessRight result = new AccessRight(); + result.setClassid("RESTRICTED"); + result.setClassname("Restricted"); + result.setSchemeid(ModelConstants.DNET_ACCESS_MODES); + return result; + } + + public static final AccessRight UNKNOWN_ACCESS_RIGHT() { + return OafMapperUtils.accessRight( + ModelConstants.UNKNOWN, + ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES + ); + } + + public static final AccessRight EMBARGOED_ACCESS_RIGHT() { + return OafMapperUtils.accessRight( + ACCESS_RIGHT_EMBARGO, + ACCESS_RIGHT_EMBARGO, + DNET_ACCESS_MODES + ); + } + + public static final AccessRight CLOSED_ACCESS_RIGHT() { + return OafMapperUtils.accessRight( + ACCESS_RIGHT_CLOSED, + "Closed Access", + ModelConstants.DNET_ACCESS_MODES + ); + } + private static Qualifier qualifier( final String classid, final String classname, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index c0ef339bd..aaae0fe0a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -6,13 +6,17 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import lombok.val; public class CleaningFunctions { public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)"; + + private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)"; public static final String DOI_PREFIX = "10."; public static final Set PID_BLACKLIST = new HashSet<>(); @@ -58,6 +62,27 @@ public class CleaningFunctions { return pid; } + /** + * This utility was moved from DOIBoost, + * it implements a better cleaning of DOI. + * In case of wrong DOI it raises an illegalArgumentException + * @param input DOI + * @return normalized DOI + */ + private static String normalizeDOI(final String input) { + if (input == null) + throw new IllegalArgumentException("PID value cannot be empty"); + final String replaced = input + .replaceAll(ALL_SPACES_REGEX, "") + .toLowerCase() + .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); + if (StringUtils.isEmpty(replaced.trim())) + throw new IllegalArgumentException("PID value normalized return empty string"); + if (!replaced.contains("10.")) + throw new IllegalArgumentException("DOI Must starts with 10."); + return replaced.substring(replaced.indexOf("10.")); + } + public static String normalizePidValue(String pidType, String pidValue) { String value = Optional .ofNullable(pidValue) @@ -67,8 +92,8 @@ public class CleaningFunctions { switch (pidType) { // TODO add cleaning for more PID types as needed - case "doi": - return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); + case ModelConstants.DOI: + return normalizeDOI(value.toLowerCase()); } return value; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index fff9ac885..b70250f26 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -495,7 +495,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { private static AccessRight accessRight(String classid, String classname, String scheme) { return OafMapperUtils .accessRight( - classid, classname, scheme, scheme); + classid, classname, scheme); } private static Qualifier qualifier(String classid, String classname, String scheme) { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 723254bab..5c7e237fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -55,19 +55,19 @@ public class OafMapperUtils { return qualifier(UNKNOWN, "Unknown", schemeid); } + + public static AccessRight accessRight( final String classid, final String classname, - final String schemeid, - final String schemename) { - return accessRight(classid, classname, schemeid, schemename, null); + final String schemeid) { + return accessRight(classid, classname, schemeid, null); } public static AccessRight accessRight( final String classid, final String classname, final String schemeid, - final String schemename, final OpenAccessRoute openAccessRoute) { final AccessRight accessRight = new AccessRight(); accessRight.setClassid(classid); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java index 0650dc53b..eb4a092cf 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java @@ -67,7 +67,7 @@ public class VocabularyTest { if (t1 == null) { System.err.println(s1 + " Missing"); } else { - System.out.println("syn=" + s1 + " term = " + t1.getClassid()); + System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname()); System.out .println( diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 08339c3a1..a26e1c83d 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.schema.oaf.utils; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.DeserializationFeature; @@ -81,4 +81,23 @@ class IdentifierFactoryTest { assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5)); } + @Test + void testNormalizeDOI() throws Exception { + + final String doi = "10.1042/BCJ20160876"; + + assertEquals(CleaningFunctions.normalizePidValue("doi", doi), doi.toLowerCase()); + final String doi2 = "0.1042/BCJ20160876"; + assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi2)); + + final String doi3 = "https://doi.org/0.1042/BCJ20160876"; + assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi3)); + + final String doi4 = "https://doi.org/10.1042/BCJ20160876"; + assertEquals(CleaningFunctions.normalizePidValue("doi", doi4), "10.1042/BCJ20160876".toLowerCase()); + + final String doi5 = "https://doi.org/10.1042/ BCJ20160876"; + assertEquals(CleaningFunctions.normalizePidValue("doi", doi5), "10.1042/BCJ20160876".toLowerCase()); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala index 4d81b4858..18299cb87 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala @@ -2,90 +2,34 @@ package eu.dnetlib.dhp.crossref import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ +import eu.dnetlib.dhp.schema.oaf.utils._ import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString, JValue} +import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} +import java.time.LocalDate +import java.time.format.DateTimeFormatter import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer +import scala.util.matching.Regex case class CrossrefDT(doi: String, json: String, timestamp: Long) {} +case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){} + +case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} + object CrossrefUtility { - val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" - val DOI_PREFIX = "10." val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME) - def normalizeDoi(input: String): String = { - if (input == null) - return null - val replaced = input - .replaceAll("(?:\\n|\\r|\\t|\\s)", "") - .toLowerCase - .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) - if (replaced == null || replaced.trim.isEmpty) - return null - if (replaced.indexOf("10.") < 0) - return null - val ret = replaced.substring(replaced.indexOf("10.")) - if (!ret.startsWith(DOI_PREFIX)) - return null - ret - } + val logger: Logger = LoggerFactory.getLogger(getClass) - def extractDate(dt: String, datePart: List[List[Int]]): String = { - if (StringUtils.isNotBlank(dt)) - return GraphCleaningFunctions.cleanDate(dt) - if (datePart != null && datePart.size == 1) { - val res = datePart.head - if (res.size == 3) { - val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d" - if (dp.length == 10) { - return GraphCleaningFunctions.cleanDate(dp) - } - } else if (res.size == 2) { - val dp = f"${res.head}-${res(1)}%02d-01" - return GraphCleaningFunctions.cleanDate(dp) - } else if (res.size == 1) { - return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01") - } - } - null - - } - - private def generateDate( - dt: String, - datePart: List[List[Int]], - classId: String, - schemeId: String - ): StructuredProperty = { - val dp = extractDate(dt, datePart) - if (StringUtils.isNotBlank(dp)) - structuredProperty(dp, classId, classId, schemeId) - else - null - } - - private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = { - val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) - if (term != null) { - val resourceType = - vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname - - resourceType match { - case "publication" => (new Publication, resourceType) - case "dataset" => (new Dataset, resourceType) - case "software" => (new Software, resourceType) - case "otherresearchproduct" => (new OtherResearchProduct, resourceType) - } - } else - null - } def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -103,7 +47,8 @@ object CrossrefUtility { val result = resultWithType._1 val cOBJCategory = resultWithType._2 - mappingResult(result, json, cOBJCategory) + val className = resultWithType._3 + mappingResult(result, json, cOBJCategory, className) if (result == null || result.getId == null) return List() @@ -111,29 +56,144 @@ object CrossrefUtility { (json \ "funder").extractOrElse[List[mappingFunder]](List()) if (funderList.nonEmpty) { - resultList = resultList ::: mappingFunderToRelations( - funderList, - result.getId, - createCrossrefCollectedFrom(), - result.getDataInfo, - result.getLastupdatetimestamp - ) + resultList = resultList ::: mappingFunderToRelations(funderList, result ) } - - result match { - case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) - } - resultList = resultList ::: List(result) resultList } - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { + private def createRelation(sourceId: String, targetId: String, relClass: String): Relation = { + val r = new Relation + r.setSource(sourceId) + r.setTarget(targetId) + r.setRelType(ModelConstants.RESULT_PROJECT) + r.setRelClass(relClass) + r.setSubRelType(ModelConstants.OUTCOME) + r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava) + r + } + + + private def generateSimpleRelationFromAward( + funder: mappingFunder, + nsPrefix: String, + extractField: String => String, + source:Result + ): List[Relation] = { + if (funder.award.isDefined && funder.award.get.nonEmpty) + funder.award.get + .map(extractField) + .filter(a => a != null && a.nonEmpty) + .map(award => { + val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true) + createRelation(targetId, source.getId, ModelConstants.PRODUCES) + }) + else List() + } + + private def extractECAward(award: String): String = { + val awardECRegex: Regex = "[0-9]{4,9}".r + if (awardECRegex.findAllIn(award).hasNext) + return awardECRegex.findAllIn(award).max + null + } + + private def snsfRule(award: String): String = { + val tmp1 = StringUtils.substringAfter(award, "_") + val tmp2 = StringUtils.substringBefore(tmp1, "/") + tmp2 + + } + + private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = { + var relList:List[Relation] = List() + + if (funders != null) + funders.foreach(funder => { + if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { + funder.DOI.get match { + case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | + "10.13039/100010665" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + case "10.13039/501100000781" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result) + case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) + case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result) + case "10.13039/501100001602" => + relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result) + case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result) + case "10.13039/501100000038" => + val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100000155" => + val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100000024" => + val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result) + case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result) + case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result) + case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result) + case "10.13039/501100003407" => + relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result) + val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100006588" | "10.13039/501100004488" => + relList =relList ::: generateSimpleRelationFromAward( + funder, + "irb_hr______", + a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result + ) + case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result) + case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result) + case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result) + case "10.13039/100004440" => + relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) + val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case _ => logger.debug("no match for " + funder.DOI.get) + + } + + } else { + funder.name match { + case "European Union’s Horizon 2020 research and innovation program" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "European Union's" => + relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + case "The French National Research Agency (ANR)" | "The French National Research Agency" => + relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) + case "CONICYT, Programa de Formación de Capital Humano Avanzado" => + relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result) + case "Wellcome Trust Masters Fellowship" => + relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) + val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) + relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case _ => logger.debug("no match for " + funder.name) + + } + } + + }) + relList + + } + + + + + + private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats //MAPPING Crossref DOI into PID - val doi: String = normalizeDoi((json \ "DOI").extract[String]) + val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String]) result.setPid( List( @@ -176,9 +236,7 @@ object CrossrefUtility { } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val subtitles = for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( - title, - ModelConstants.SUBTITLE_QUALIFIER - ) + title, ModelConstants.SUBTITLE_QUALIFIER) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION @@ -244,21 +302,28 @@ object CrossrefUtility { if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava - ) + subjectList.map(s => + OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null) + ).asJava) } //Mapping Author - val authorList: List[mappingAuthor] = - (json \ "author").extractOrElse[List[mappingAuthor]](List()) + val authorList:List[CrossrefAuthor] = + for { + JObject(author) <- json \ "author" + JField("ORCID", JString(orcid)) <- author + JField("given", JString(givenName)) <- author + JField("family", JString(familyName)) <- author + JField("sequence", JString(sequence)) <- author + } yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0) - val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => - a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") - ) - - result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) - }.asJava) + result.setAuthor(authorList.sortWith((a,b) =>{ + if (a.sequence.equalsIgnoreCase("first")) + true + else if (b.sequence.equalsIgnoreCase("first")) + false + else a.familyName< b.familyName + }).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava) // Mapping instance val instance = new Instance() @@ -266,8 +331,8 @@ object CrossrefUtility { JObject(license) <- json \ "license" JField("URL", JString(lic)) <- license JField("content-version", JString(content_version)) <- license - } yield (asField(lic), content_version) - val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) + } yield (new License(lic), content_version) + val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl)) if (l.nonEmpty) { if (l exists (d => d._2.equals("vor"))) { for (d <- l) { @@ -290,66 +355,225 @@ object CrossrefUtility { OafMapperUtils.qualifier( "0001", "peerReviewed", - ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS ) ) } - instance.setAccessright( - decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) - ) + if (instance.getLicense!= null) + instance.setAccessright( + decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance) + ) instance.setInstancetype( OafMapperUtils.qualifier( - cobjCategory.substring(0, 4), - cobjCategory.substring(5), - ModelConstants.DNET_PUBLICATION_RESOURCE, + cobjCategory, + className, ModelConstants.DNET_PUBLICATION_RESOURCE ) ) result.setResourcetype( OafMapperUtils.qualifier( - cobjCategory.substring(0, 4), - cobjCategory.substring(5), - ModelConstants.DNET_PUBLICATION_RESOURCE, + cobjCategory, + className, ModelConstants.DNET_PUBLICATION_RESOURCE ) ) - instance.setCollectedfrom(createCrossrefCollectedFrom()) + instance.setCollectedfrom(CROSSREF_COLLECTED_FROM) if (StringUtils.isNotBlank(issuedDate)) { - instance.setDateofacceptance(asField(issuedDate)) + instance.setDateofacceptance(issuedDate) } else { - instance.setDateofacceptance(asField(createdDate.getValue)) + instance.setDateofacceptance(createdDate.getValue) } val s: List[String] = List("https://doi.org/" + doi) - // val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct - // if (links.nonEmpty) { - // instance.setUrl(links.asJava) - // } if (s.nonEmpty) { instance.setUrl(s.asJava) } + val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct + //Mapping book + if (className.toLowerCase.contains("book")) { + val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn + if (ISBN.nonEmpty && containerTitles.nonEmpty) { + val source = s"${containerTitles.head} ISBN: ${ISBN.head}" + if (result.getSource != null) { + val l: List[String] = result.getSource.asScala.toList ::: List(source) + result.setSource(l.asJava) + } else + result.setSource(List(source).asJava) + } + } else { + // Mapping Journal + val issnInfos = for { + JObject(issn_type) <- json \ "issn-type" + JField("type", JString(tp)) <- issn_type + JField("value", JString(vl)) <- issn_type + } yield Tuple2(tp, vl) + + val volume = (json \ "volume").extractOrElse[String](null) + if (containerTitles.nonEmpty) { + val journal = new Journal + journal.setName(containerTitles.head) + if (issnInfos.nonEmpty) { + + issnInfos.foreach(tp => { + tp._1 match { + case "electronic" => journal.setIssnOnline(tp._2) + case "print" => journal.setIssnPrinted(tp._2) + } + }) + } + journal.setVol(volume) + val page = (json \ "page").extractOrElse[String](null) + if (page != null) { + val pp = page.split("-") + if (pp.nonEmpty) + journal.setSp(pp.head) + if (pp.size > 1) + journal.setEp(pp(1)) + } + result.setJournal(journal) + } + } + result.setInstance(List(instance).asJava) - - //IMPORTANT - //The old method result.setId(generateIdentifier(result, doi)) - //is replaced using IdentifierFactory, but the old identifier - //is preserved among the originalId(s) - val oldId = generateIdentifier(result, doi) - result.setId(oldId) - - val newId = IdentifierFactory.createDOIBoostIdentifier(result) - if (!oldId.equalsIgnoreCase(newId)) { - result.getOriginalId.add(oldId) - } - result.setId(newId) - - if (result.getId == null) + result.setId("ID") + result.setId(IdentifierFactory.createIdentifier(result, true)) + if (result.getId == null || "ID".equalsIgnoreCase(result.getId)) null else result } + def decideAccessRight(license: String, date: String): AccessRight = { + if (license == null || license.isEmpty) { + //Default value Unknown + return ModelConstants.UNKNOWN_ACCESS_RIGHT(); + } + //CC licenses + if ( + license.startsWith("cc") || + license.startsWith("http://creativecommons.org/licenses") || + license.startsWith("https://creativecommons.org/licenses") || + + //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || + + //APA (considered OPEN also by Unpaywall) + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") + ) { + + val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + + //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) + if ( + license.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) { + val now = java.time.LocalDate.now + + try { + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } else { + return ModelConstants.EMBARGOED_ACCESS_RIGHT() + } + } catch { + case _: Exception => { + try { + val pub_date = + LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = OPEN_ACCESS_RIGHT() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } else { + return ModelConstants.EMBARGOED_ACCESS_RIGHT() + } + } catch { + case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT() + } + } + + } + + } + + ModelConstants.CLOSED_ACCESS_RIGHT() + } + + + private def extractDate(dt: String, datePart: List[List[Int]]): String = { + if (StringUtils.isNotBlank(dt)) + return GraphCleaningFunctions.cleanDate(dt) + if (datePart != null && datePart.size == 1) { + val res = datePart.head + if (res.size == 3) { + val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d" + if (dp.length == 10) { + return GraphCleaningFunctions.cleanDate(dp) + } + } else if (res.size == 2) { + val dp = f"${res.head}-${res(1)}%02d-01" + return GraphCleaningFunctions.cleanDate(dp) + } else if (res.size == 1) { + return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01") + } + } + null + } + + private def generateDate( + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { + val dp = extractDate(dt, datePart) + if (StringUtils.isNotBlank(dp)) + structuredProperty(dp, classId, classId, schemeId) + else + null + } + + private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = { + val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) + if (term != null) { + val resourceType = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname + + resourceType match { + case "publication" => (new Publication, resourceType, term.getClassname) + case "dataset" => (new Dataset, resourceType, term.getClassname) + case "software" => (new Software, resourceType, term.getClassname) + case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname) + } + } else + null + } + + private def generateAuthor(ca: CrossrefAuthor): Author = { + val a = new Author + a.setName(ca.givenName) + a.setSurname(ca.familyName) + a.setFullname(s"${ca.familyName}, ${ca.givenName}") + a.setRank(ca.rank + 1) + if (StringUtils.isNotBlank(ca.ORCID)) + a.setPid( + List( + OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null) + ).asJava + ) + a + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index afb687b37..2696b5252 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -553,7 +553,6 @@ object DataciteToOAFTransformation { OafMapperUtils.accessRight( ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, - ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES )