From 125657ed4c390a1779595377ff44fb6171cd8f07 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 13 Feb 2023 12:40:14 +0100 Subject: [PATCH 1/3] - --- dhp-build/dhp-code-style/pom.xml | 3 +- .../dhp/schema/common/ModelConstants.java | 8 +- .../dhp/schema/oaf/common/ModelSupport.java | 3 +- .../dhp/schema/oaf/utils/MergeUtils.java | 32 +++--- .../dhp/schema/oaf/utils/OafMapperUtils.java | 3 +- .../dhp/common/vocabulary/VocabularyTest.java | 107 +++++++++--------- .../dhp/crossref/CrossrefUtility.scala | 68 ++++++----- .../crossref/GenerateCrossrefDataset.scala | 18 +-- .../DataciteToOAFTransformation.scala | 10 +- .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 11 +- dhp-workflows/dhp-enrichment/dtree | 18 +++ 11 files changed, 148 insertions(+), 133 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/dtree diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 5a86efe17..c0e12a63f 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,8 @@ eu.dnetlib.dhp dhp-code-style - 1.2.5-SNAPSHOT + + 2.0.0-SNAPSHOT jar diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index d2ef9fa7b..dc38f218f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -7,7 +7,8 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; public class ModelConstants { - private ModelConstants() {} + private ModelConstants() { + } public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; @@ -80,7 +81,6 @@ public class ModelConstants { public static final String PROVENANCE_DEDUP = "sysimport:dedup"; public static final String PROVENANCE_ENRICH = "sysimport:enrich"; - public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier( SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS); @@ -127,8 +127,6 @@ public class ModelConstants { public static final String IS_REQUIRED_BY = "IsRequiredBy"; public static final String REQUIRES = "Requires"; - - public static final String CITATION = "citation"; // subreltype public static final String CITES = "Cites"; public static final String IS_CITED_BY = "IsCitedBy"; @@ -219,7 +217,7 @@ public class ModelConstants { "main title", "main title", DNET_DATACITE_TITLE); public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier( - "alternative title", "alternative title", DNET_DATACITE_TITLE); + "alternative title", "alternative title", DNET_DATACITE_TITLE); private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java index 3ea391bd4..3ee3ed5a1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java @@ -209,7 +209,8 @@ public class ModelSupport { return idPrefixMap.get(clazz); } - public static Boolean sameClass(X left, Y right, Class superClazz) { + public static Boolean sameClass(X left, Y right, + Class superClazz) { return isSubClass(left, superClazz) && isSubClass(right, superClazz); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index cc6e10d81..ae275681d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -31,10 +31,10 @@ public class MergeUtils { return mergeRelation(left, right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -53,10 +53,10 @@ public class MergeUtils { return mergeProject(left, right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -110,8 +110,8 @@ public class MergeUtils { mergedEntity.setLastupdatetimestamp(enrich.getLastupdatetimestamp()); } else if (mergedEntity.getLastupdatetimestamp() != null && enrich.getLastupdatetimestamp() != null) { mergedEntity - .setLastupdatetimestamp( - Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp())); + .setLastupdatetimestamp( + Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp())); } mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid())); @@ -138,7 +138,7 @@ public class MergeUtils { checkArgument(Objects.equals(original.getTarget(), enrich.getTarget()), "target ids must be equal"); checkArgument(Objects.equals(original.getRelType(), enrich.getRelType()), "relType(s) must be equal"); checkArgument( - Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); + Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(original.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal"); original.setProvenance(mergeLists(original.getProvenance(), enrich.getProvenance())); @@ -148,10 +148,10 @@ public class MergeUtils { original.setValidationDate(ModelSupport.oldest(original.getValidationDate(), enrich.getValidationDate())); } catch (ParseException e) { throw new IllegalArgumentException(String - .format( - "invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), - original.getTarget(), - original.getValidationDate())); + .format( + "invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), + original.getTarget(), + original.getValidationDate())); } return (T) original; @@ -370,7 +370,7 @@ public class MergeUtils { private static T mergePublication(T original, T enrich) { - //add publication specific fields. + // add publication specific fields. mergeEntityDataInfo(original, enrich); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index ff16cf4d8..723254bab 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -363,7 +363,8 @@ public class OafMapperUtils { final Entity entity, final String validationDate) { - final List provenance = getProvenance(entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo())); + final List provenance = getProvenance( + entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo())); return getRelation( source, target, relType, subRelType, relClass, provenance, validationDate, null); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java index 6529d43da..0650dc53b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java @@ -1,8 +1,13 @@ + package eu.dnetlib.dhp.common.vocabulary; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -12,73 +17,63 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -import static org.mockito.Mockito.lenient; - +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class VocabularyTest { + @Mock + protected ISLookUpService isLookUpService; - @Mock - protected ISLookUpService isLookUpService; + protected VocabularyGroup vocabularies; - protected VocabularyGroup vocabularies; + @BeforeEach + public void setUpVocabulary() throws ISLookUpException, IOException { - @BeforeEach - public void setUpVocabulary() throws ISLookUpException, IOException { + lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); - lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); + lenient() + .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) + .thenReturn(synonyms()); + vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService); + } - lenient() - .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) - .thenReturn(synonyms()); - vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService); - } + private static List vocs() throws IOException { + return IOUtils + .readLines( + Objects + .requireNonNull( + VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"))); + } - private static List vocs() throws IOException { - return IOUtils - .readLines( - Objects - .requireNonNull( - VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"))); - } + private static List synonyms() throws IOException { + return IOUtils + .readLines( + Objects + .requireNonNull( + VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"))); + } - private static List synonyms() throws IOException { - return IOUtils - .readLines( - Objects - .requireNonNull( - VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"))); - } + @Test + void testVocabularyMatch() throws Exception { + final String s = IOUtils.toString(this.getClass().getResourceAsStream("terms")); + for (String s1 : s.split("\n")) { - @Test - void testVocabularyMatch () throws Exception{ - final String s= IOUtils.toString(this.getClass().getResourceAsStream("terms")); + final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1); - for (String s1 : s.split("\n")) { + if (t1 == null) { + System.err.println(s1 + " Missing"); + } else { + System.out.println("syn=" + s1 + " term = " + t1.getClassid()); - final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1); + System.out + .println( + vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname()); + } + } - if (t1 == null) { - System.err.println(s1+ " Missing"); - } - else { - System.out.println("syn=" + s1 + " term = " + t1.getClassid()); - - - System.out.println(vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname()); - } - } - - - - - - } + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala index 28ea64c9b..4d81b4858 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala @@ -13,8 +13,8 @@ import org.json4s.jackson.JsonMethods.parse import scala.collection.JavaConverters._ - case class CrossrefDT(doi: String, json: String, timestamp: Long) {} + object CrossrefUtility { val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" val DOI_PREFIX = "10." @@ -37,7 +37,6 @@ object CrossrefUtility { ret } - def extractDate(dt: String, datePart: List[List[Int]]): String = { if (StringUtils.isNotBlank(dt)) return GraphCleaningFunctions.cleanDate(dt) @@ -60,36 +59,35 @@ object CrossrefUtility { } private def generateDate( - dt: String, - datePart: List[List[Int]], - classId: String, - schemeId: String - ): StructuredProperty = { + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { val dp = extractDate(dt, datePart) if (StringUtils.isNotBlank(dp)) - structuredProperty(dp, classId, classId,schemeId) + structuredProperty(dp, classId, classId, schemeId) else null } - - private def generateItemFromType(objectType: String, vocabularies:VocabularyGroup): (Result, String) = { + private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = { val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) if (term != null) { - val resourceType = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname + val resourceType = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname resourceType match { - case "publication" =>(new Publication, resourceType) - case "dataset" =>(new Dataset, resourceType) - case "software" => (new Software, resourceType) - case "otherresearchproduct" =>(new OtherResearchProduct, resourceType) + case "publication" => (new Publication, resourceType) + case "dataset" => (new Dataset, resourceType) + case "software" => (new Software, resourceType) + case "otherresearchproduct" => (new OtherResearchProduct, resourceType) } } else null } - - def convert(input: String, vocabularies:VocabularyGroup): List[Oaf] = { + def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -124,14 +122,13 @@ object CrossrefUtility { result match { case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) + case dataset: Dataset => convertDataset(dataset) } resultList = resultList ::: List(result) resultList } - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -140,8 +137,9 @@ object CrossrefUtility { result.setPid( List( - structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES) - ).asJava) + structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES) + ).asJava + ) //MAPPING Crossref DOI into OriginalId //and Other Original Identifier of dataset like clinical-trial-number @@ -149,11 +147,10 @@ object CrossrefUtility { val alternativeIds: List[String] = for (JString(ids) <- json \ "alternative-id") yield ids val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi) - result.setOriginalId(tmp.filter(id => id != null).asJava) // Add DataInfo - result.setDataInfo(dataInfo(false, false,0.9F,null, false,ModelConstants.REPOSITORY_PROVENANCE_ACTIONS)) + result.setDataInfo(dataInfo(false, false, 0.9f, null, false, ModelConstants.REPOSITORY_PROVENANCE_ACTIONS)) result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long]) result.setDateofcollection((json \ "indexed" \ "date-time").extract[String]) @@ -167,23 +164,26 @@ object CrossrefUtility { // TITLE val mainTitles = - for {JString(title) <- json \ "title" if title.nonEmpty} - yield - structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER) + for { JString(title) <- json \ "title" if title.nonEmpty } yield structuredProperty( + title, + ModelConstants.MAIN_TITLE_QUALIFIER + ) val originalTitles = for { JString(title) <- json \ "original-title" if title.nonEmpty } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val shortTitles = for { JString(title) <- json \ "short-title" if title.nonEmpty - } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) + } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val subtitles = - for {JString(title) <- json \ "subtitle" if title.nonEmpty} - yield structuredProperty(title, ModelConstants.SUBTITLE_QUALIFIER) + for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( + title, + ModelConstants.SUBTITLE_QUALIFIER + ) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION val descriptionList = - for {JString(description) <- json \ "abstract"} yield description + for { JString(description) <- json \ "abstract" } yield description result.setDescription(descriptionList.asJava) // Source @@ -242,11 +242,9 @@ object CrossrefUtility { //Mapping Subject val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List()) - - if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava + subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava ) } @@ -265,8 +263,8 @@ object CrossrefUtility { // Mapping instance val instance = new Instance() val license = for { - JObject(license) <- json \ "license" - JField("URL", JString(lic)) <- license + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license JField("content-version", JString(content_version)) <- license } yield (asField(lic), content_version) val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala index fd4bcd37d..0d45d1c83 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala @@ -3,20 +3,20 @@ package eu.dnetlib.dhp.crossref import eu.dnetlib.dhp.application.AbstractScalaApplication import org.slf4j.{Logger, LoggerFactory} -class GenerateCrossrefDataset (propertyPath: String, args: Array[String], log: Logger) - extends AbstractScalaApplication(propertyPath, args, log: Logger) { +class GenerateCrossrefDataset(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + /** Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + * where the whole logic of the spark node is defined + */ override def run(): Unit = ??? } - -object GenerateCrossrefDataset{ - val log:Logger = LoggerFactory.getLogger(getClass) - val propertyPath ="/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" +object GenerateCrossrefDataset { + val log: Logger = LoggerFactory.getLogger(getClass) + val propertyPath = "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" def main(args: Array[String]): Unit = { - new GenerateCrossrefDataset(propertyPath,args, log).initialize().run() + new GenerateCrossrefDataset(propertyPath, args, log).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 38a3350a0..afb687b37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -284,11 +284,11 @@ object DataciteToOAFTransformation { } def generateRelation( - sourceId: String, - targetId: String, - relClass: String, - collectedFrom: KeyValue, - di: DataInfo + sourceId: String, + targetId: String, + relClass: String, + collectedFrom: KeyValue, + di: DataInfo ): Relation = { val r = new Relation r.setSource(sourceId) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 091d48713..98a8c4c68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -360,10 +360,13 @@ object BioDBToOAF { val rel = new Relation - val provenance = OafMapperUtils.getProvenance(Lists.newArrayList( - collectedFrom, - collectedFromMap("pdb") - ), REL_DATA_INFO) + val provenance = OafMapperUtils.getProvenance( + Lists.newArrayList( + collectedFrom, + collectedFromMap("pdb") + ), + REL_DATA_INFO + ) rel.setProvenance(provenance) diff --git a/dhp-workflows/dhp-enrichment/dtree b/dhp-workflows/dhp-enrichment/dtree new file mode 100644 index 000000000..868ae7918 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/dtree @@ -0,0 +1,18 @@ +[INFO] Scanning for projects... +[INFO] +[INFO] -------------------< eu.dnetlib.dhp:dhp-enrichment >-------------------- +[INFO] Building dhp-enrichment 2.0.0-SNAPSHOT +[INFO] --------------------------------[ jar ]--------------------------------- +[INFO] ------------------------------------------------------------------------ +[INFO] BUILD FAILURE +[INFO] ------------------------------------------------------------------------ +[INFO] Total time: 1.737 s +[INFO] Finished at: 2023-02-10T17:53:31+01:00 +[INFO] ------------------------------------------------------------------------ +[ERROR] Failed to execute goal on project dhp-enrichment: Could not resolve dependencies for project eu.dnetlib.dhp:dhp-enrichment:jar:2.0.0-SNAPSHOT: Failed to collect dependencies at eu.dnetlib.dhp:dhp-common:jar:2.0.0-SNAPSHOT: Failed to read artifact descriptor for eu.dnetlib.dhp:dhp-common:jar:2.0.0-SNAPSHOT: Failure to find eu.dnetlib.dhp:dhp:pom:2.0.0-SNAPSHOT in https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-snapshot/ was cached in the local repository, resolution will not be reattempted until the update interval of dnet45-bootstrap-snapshot has elapsed or updates are forced -> [Help 1] +[ERROR] +[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch. +[ERROR] Re-run Maven using the -X switch to enable full debug logging. +[ERROR] +[ERROR] For more information about the errors and possible solutions, please read the following articles: +[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/DependencyResolutionException From 624c62f62dcba76cc46e882bda486513571e21f8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 15 Feb 2023 16:20:24 +0100 Subject: [PATCH 2/3] [enrichment] changed to make it compile against the new model --- .../dhp/schema/common/ModelConstants.java | 18 +- .../schema/oaf/utils/CleaningFunctions.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 4 +- .../dhp/common/vocabulary/VocabularyTest.java | 2 +- .../actionmanager/promote/MergeAndGet.java | 13 +- .../PromoteActionPayloadForGraphTableJob.java | 3 +- .../promote/MergeAndGetTest.java | 4 +- ...moteActionPayloadForGraphTableJobTest.java | 3 +- .../dnetlib/dhp/actionmanager/Constants.java | 46 ++-- .../PrepareBipFinder.java | 58 +---- .../PrepareFOSSparkJob.java | 4 +- .../PrepareSDGSparkJob.java | 4 +- .../CreateActionSetSparkJob.java | 16 +- .../project/SparkAtomicActionJob.java | 6 +- .../ror/GenerateRorActionSetJob.java | 2 +- .../usagestats/SparkAtomicActionUsageJob.java | 2 +- .../GenerateNativeStoreSparkJob.java | 2 +- .../dhp/crossref/CrossrefUtility.scala | 243 +++++++++++------- .../CreateOpenCitationsASTest.java | 4 +- .../eu/dnetlib/dhp/PropagationConstant.java | 75 ++---- .../dhp/bulktag/community/ResultTagger.java | 50 ++-- .../bulktag/community/TaggingConstants.java | 2 +- .../dhp/bulktag/eosc/SparkEoscBulkTag.java | 9 +- .../dhp/bulktag/eosc/SparkEoscTag.java | 4 +- .../PrepareDatasourceCountryAssociation.java | 3 +- .../SparkCountryPropagationJob.java | 2 + .../SparkOrcidToResultFromSemRelJob.java | 27 +- ...kResultToCommunityFromOrganizationJob.java | 18 +- ...parkResultToCommunityThroughSemRelJob.java | 17 +- .../PrepareInfo.java | 3 +- .../StepActions.java | 12 +- .../resolution/ResolveEntitiesTest.scala | 8 +- .../sx/graph/scholix/ScholixGraphTest.scala | 5 +- 33 files changed, 333 insertions(+), 338 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index f10fda99d..92400a1ee 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -241,27 +241,27 @@ public class ModelConstants { } public static final AccessRight UNKNOWN_ACCESS_RIGHT() { - return OafMapperUtils.accessRight( + return OafMapperUtils + .accessRight( ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, - ModelConstants.DNET_ACCESS_MODES - ); + ModelConstants.DNET_ACCESS_MODES); } public static final AccessRight EMBARGOED_ACCESS_RIGHT() { - return OafMapperUtils.accessRight( + return OafMapperUtils + .accessRight( ACCESS_RIGHT_EMBARGO, ACCESS_RIGHT_EMBARGO, - DNET_ACCESS_MODES - ); + DNET_ACCESS_MODES); } public static final AccessRight CLOSED_ACCESS_RIGHT() { - return OafMapperUtils.accessRight( + return OafMapperUtils + .accessRight( ACCESS_RIGHT_CLOSED, "Closed Access", - ModelConstants.DNET_ACCESS_MODES - ); + ModelConstants.DNET_ACCESS_MODES); } private static Qualifier qualifier( diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index aaae0fe0a..352cdad47 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -6,9 +6,9 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import lombok.val; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 5c7e237fb..8a0661bb6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -55,13 +55,11 @@ public class OafMapperUtils { return qualifier(UNKNOWN, "Unknown", schemeid); } - - public static AccessRight accessRight( final String classid, final String classname, final String schemeid) { - return accessRight(classid, classname, schemeid, null); + return accessRight(classid, classname, schemeid, null); } public static AccessRight accessRight( diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java index eb4a092cf..958806837 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java @@ -67,7 +67,7 @@ public class VocabularyTest { if (t1 == null) { System.err.println(s1 + " Missing"); } else { - System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname()); + System.out.println("syn=" + s1 + " term = " + t1.getClassid() + " " + t1.getClassname()); System.out .println( diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index 0338a7aae..f1afdad22 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -7,7 +7,6 @@ import java.util.function.BiFunction; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.schema.oaf.*; - import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; /** OAF model merging support. */ @@ -56,19 +55,19 @@ public class MergeAndGet { Entity yE = (Entity) y; if (xE.getClass().equals(yE.getClass()) - && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) { + && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) { return x; } else if (xE.getClass().equals(yE.getClass()) - && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) { + && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) { return (G) y; } else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) { return x; } else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) { throw new RuntimeException( - String - .format( - "SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s", - x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); + String + .format( + "SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s", + x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); } } diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 6f76b6a8d..f872d1339 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -8,7 +8,6 @@ import java.util.Optional; import java.util.function.BiFunction; import java.util.function.Function; -import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -26,8 +25,8 @@ import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.HdfsSupport; - import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; /** Applies a given action payload file to graph table of compatible type. */ public class PromoteActionPayloadForGraphTableJob { diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index fde308c42..9c12d7f3f 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -8,12 +8,12 @@ import static org.mockito.Mockito.*; import java.util.function.BiFunction; -import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; public class MergeAndGetTest { @@ -97,7 +97,7 @@ public class MergeAndGetTest { // then Oaf x = fn.get().apply(a, b); assertTrue(Relation.class.isAssignableFrom(x.getClass())); - //verify(a).mergeFrom(b); + // verify(a).mergeFrom(b); a = MergeUtils.merge(verify(a), b); assertEquals(a, x); } diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index 10a276428..c3a32fb46 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -14,7 +14,6 @@ import java.util.Objects; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -28,8 +27,8 @@ import org.junit.jupiter.params.provider.MethodSource; import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; public class PromoteActionPayloadForGraphTableJobTest { private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index cdba4ce09..b57a60646 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -3,8 +3,6 @@ package eu.dnetlib.dhp.actionmanager; import java.util.Optional; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.EntityDataInfo; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -14,6 +12,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.EntityDataInfo; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -43,28 +43,28 @@ public class Constants { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final EntityDataInfo SciNoBo_DATA_INFO = OafMapperUtils - .dataInfo( - false, - false, - 0.8f, //TODO check - "SciNoBo", - true, - OafMapperUtils - .qualifier( - ModelConstants.PROVENANCE_ENRICH, - null, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + .dataInfo( + false, + false, + 0.8f, // TODO check + "SciNoBo", + true, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS)); public static final DataInfo Bip_DATA_INFO3 = OafMapperUtils - .dataInfo( - 0.8f, - UPDATE_DATA_INFO_TYPE, - false, - OafMapperUtils - .qualifier( - UPDATE_MEASURE_BIP_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + .dataInfo( + 0.8f, + UPDATE_DATA_INFO_TYPE, + false, + OafMapperUtils + .qualifier( + UPDATE_MEASURE_BIP_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); private Constants() { } @@ -101,7 +101,7 @@ public class Constants { .setDataInfo( OafMapperUtils .dataInfo( - 0.0f, //TODO check + 0.0f, // TODO check UPDATE_DATA_INFO_TYPE, true, OafMapperUtils diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 334be5b49..c4e52ff50 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,8 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -31,8 +29,10 @@ import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareBipFinder implements Serializable { @@ -100,38 +100,16 @@ public class PrepareBipFinder implements Serializable { Instance inst = new Instance(); /* - inst - .setPid( - Arrays - .asList( - OafMapperUtils - .structuredProperty( - cleanedPid, - OafMapperUtils - .qualifier( - PidType.doi.toString(), DOI_CLASSNAME, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES), - null))); - + * inst .setPid( Arrays .asList( OafMapperUtils .structuredProperty( cleanedPid, OafMapperUtils + * .qualifier( PidType.doi.toString(), DOI_CLASSNAME, ModelConstants.DNET_PID_TYPES, + * ModelConstants.DNET_PID_TYPES), null))); */ r.setInstance(Arrays.asList(inst)); /* - r - .setDataInfo( - OafMapperUtils - .dataInfo( - false, null, true, - false, - OafMapperUtils - .qualifier( - ModelConstants.PROVENANCE_ENRICH, - null, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - null)); - + * r .setDataInfo( OafMapperUtils .dataInfo( false, null, true, false, OafMapperUtils .qualifier( + * ModelConstants.PROVENANCE_ENRICH, null, ModelConstants.DNET_PROVENANCE_ACTIONS, + * ModelConstants.DNET_PROVENANCE_ACTIONS), null)); */ return r; }, Encoders.bean(Result.class)) @@ -158,22 +136,10 @@ public class PrepareBipFinder implements Serializable { u.setValue(u.getValue()); u.setKey(u.getKey()); /* - kv - .setDataInfo( - OafMapperUtils - .dataInfo( - false, - UPDATE_DATA_INFO_TYPE, - true, - false, - OafMapperUtils - .qualifier( - UPDATE_MEASURE_BIP_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); - + * kv .setDataInfo( OafMapperUtils .dataInfo( false, UPDATE_DATA_INFO_TYPE, true, false, + * OafMapperUtils .qualifier( UPDATE_MEASURE_BIP_CLASS_ID, UPDATE_CLASS_NAME, + * ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), + * "")); */ return u; }) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index e99df1b3b..9544b31cf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -8,8 +8,6 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.EntityDataInfo; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.EntityDataInfo; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareFOSSparkJob implements Serializable { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java index 944209c60..bbae4346f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -8,8 +8,6 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import eu.dnetlib.dhp.schema.oaf.EntityDataInfo; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.EntityDataInfo; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareSDGSparkJob implements Serializable { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 78d3b671a..11594f83a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -7,8 +7,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; @@ -31,6 +29,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import scala.Tuple2; public class CreateActionSetSparkJob implements Serializable { @@ -47,17 +47,20 @@ public class CreateActionSetSparkJob implements Serializable { COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID); COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME); - DATA_INFO = OafMapperUtils.dataInfo( + DATA_INFO = OafMapperUtils + .dataInfo( TRUST, null, false, - OafMapperUtils.qualifier( + OafMapperUtils + .qualifier( OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS)); } - private static final List PROVENANCE = Arrays.asList( + private static final List PROVENANCE = Arrays + .asList( OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO)); private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); @@ -144,7 +147,8 @@ public class CreateActionSetSparkJob implements Serializable { } private static String asOpenAireId(String value) { - return IdentifierFactory.idFromPid( + return IdentifierFactory + .idFromPid( "50", PidType.doi.toString(), CleaningFunctions.normalizePidValue(PidType.doi.toString(), value), true); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index 0ac31cfdd..02da901a6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -7,8 +7,6 @@ import java.util.Arrays; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; @@ -29,10 +27,12 @@ import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Entity; import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Programme; -import eu.dnetlib.dhp.schema.oaf.Entity; import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index 37b87607e..8def58740 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -20,7 +20,6 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index bc9859154..de328ac49 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -121,7 +121,7 @@ public class SparkAtomicActionUsageJob implements Serializable { private static List getMeasure(Long downloads, Long views) { DataInfo dataInfo = OafMapperUtils .dataInfo( - 0.0f, //TODO check + 0.0f, // TODO check UPDATE_DATA_INFO_TYPE, false, OafMapperUtils diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index 164cf99b9..caaa67ff3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -11,7 +11,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.IntWritable; @@ -33,6 +32,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.Provenance; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import scala.Tuple2; public class GenerateNativeStoreSparkJob { diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala index 18299cb87..7c7c6df29 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala @@ -21,7 +21,7 @@ import scala.util.matching.Regex case class CrossrefDT(doi: String, json: String, timestamp: Long) {} -case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){} +case class CrossrefAuthor(givenName: String, familyName: String, ORCID: String, sequence: String, rank: Int) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} @@ -30,7 +30,6 @@ object CrossrefUtility { val logger: Logger = LoggerFactory.getLogger(getClass) - def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -56,7 +55,7 @@ object CrossrefUtility { (json \ "funder").extractOrElse[List[mappingFunder]](List()) if (funderList.nonEmpty) { - resultList = resultList ::: mappingFunderToRelations(funderList, result ) + resultList = resultList ::: mappingFunderToRelations(funderList, result) } resultList = resultList ::: List(result) resultList @@ -73,19 +72,18 @@ object CrossrefUtility { r } - private def generateSimpleRelationFromAward( - funder: mappingFunder, - nsPrefix: String, - extractField: String => String, - source:Result - ): List[Relation] = { + funder: mappingFunder, + nsPrefix: String, + extractField: String => String, + source: Result + ): List[Relation] = { if (funder.award.isDefined && funder.award.get.nonEmpty) funder.award.get .map(extractField) .filter(a => a != null && a.nonEmpty) .map(award => { - val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true) + val targetId = IdentifierFactory.createOpenaireId("project", s"$nsPrefix::$award", true) createRelation(targetId, source.getId, ModelConstants.PRODUCES) }) else List() @@ -106,56 +104,74 @@ object CrossrefUtility { } private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = { - var relList:List[Relation] = List() + var relList: List[Relation] = List() if (funders != null) funders.foreach(funder => { if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { funder.DOI.get match { case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | - "10.13039/100010665" => - relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + "10.13039/100010665" => + relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => - relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) case "10.13039/501100000781" => - relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) - relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) - case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result) - case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) - case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + case "10.13039/100000001" => + relList = relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result) + case "10.13039/501100001665" => + relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) + case "10.13039/501100002341" => + relList = relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result) case "10.13039/501100001602" => - relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result) - case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result) + relList = + relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result) + case "10.13039/501100000923" => + relList = relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result) case "10.13039/501100000038" => - val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false) - relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + val targetId = + IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false) + relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) case "10.13039/501100000155" => - val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false) - relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + val targetId = + IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false) + relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) case "10.13039/501100000024" => - val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false) - relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) - case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result) - case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result) - case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result) - case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result) + val targetId = + IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false) + relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + case "10.13039/501100002848" => + relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result) + case "10.13039/501100003448" => + relList = relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result) + case "10.13039/501100010198" => + relList = relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result) + case "10.13039/501100004564" => + relList = relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result) case "10.13039/501100003407" => - relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result) - val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false) - relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + relList = relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result) + val targetId = + IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false) + relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) case "10.13039/501100006588" | "10.13039/501100004488" => - relList =relList ::: generateSimpleRelationFromAward( + relList = relList ::: generateSimpleRelationFromAward( funder, "irb_hr______", - a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result + a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), + result ) - case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result) - case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result) - case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result) + case "10.13039/501100006769" => + relList = relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result) + case "10.13039/501100001711" => + relList = relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result) + case "10.13039/501100004410" => + relList = relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result) case "10.13039/100004440" => - relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) - val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) - relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) + val targetId = + IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) + relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) case _ => logger.debug("no match for " + funder.DOI.get) } @@ -163,18 +179,19 @@ object CrossrefUtility { } else { funder.name match { case "European Union’s Horizon 2020 research and innovation program" => - relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) case "European Union's" => - relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) - relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result) case "The French National Research Agency (ANR)" | "The French National Research Agency" => - relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result) case "CONICYT, Programa de Formación de Capital Humano Avanzado" => - relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result) + relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result) case "Wellcome Trust Masters Fellowship" => - relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) - val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) - relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) + relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result) + val targetId = + IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false) + relList = relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES)) case _ => logger.debug("no match for " + funder.name) } @@ -185,11 +202,7 @@ object CrossrefUtility { } - - - - - private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = { + private def mappingResult(result: Result, json: JValue, cobjCategory: String, className: String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats //MAPPING Crossref DOI into PID @@ -236,7 +249,9 @@ object CrossrefUtility { } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val subtitles = for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( - title, ModelConstants.SUBTITLE_QUALIFIER) + title, + ModelConstants.SUBTITLE_QUALIFIER + ) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION @@ -302,28 +317,52 @@ object CrossrefUtility { if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => - OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null) - ).asJava) + subjectList + .map(s => + OafMapperUtils.subject( + s, + OafMapperUtils.qualifier( + ModelConstants.DNET_SUBJECT_KEYWORD, + ModelConstants.DNET_SUBJECT_KEYWORD, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ), + null + ) + ) + .asJava + ) } //Mapping Author - val authorList:List[CrossrefAuthor] = + val authorList: List[CrossrefAuthor] = for { - JObject(author) <- json \ "author" - JField("ORCID", JString(orcid)) <- author - JField("given", JString(givenName)) <- author - JField("family", JString(familyName)) <- author - JField("sequence", JString(sequence)) <- author - } yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0) + JObject(author) <- json \ "author" + JField("ORCID", JString(orcid)) <- author + JField("given", JString(givenName)) <- author + JField("family", JString(familyName)) <- author + JField("sequence", JString(sequence)) <- author + } yield CrossrefAuthor( + givenName = givenName, + familyName = familyName, + ORCID = orcid, + sequence = sequence, + rank = 0 + ) - result.setAuthor(authorList.sortWith((a,b) =>{ - if (a.sequence.equalsIgnoreCase("first")) - true - else if (b.sequence.equalsIgnoreCase("first")) - false - else a.familyName< b.familyName - }).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava) + result.setAuthor( + authorList + .sortWith((a, b) => { + if (a.sequence.equalsIgnoreCase("first")) + true + else if (b.sequence.equalsIgnoreCase("first")) + false + else a.familyName < b.familyName + }) + .zipWithIndex + .map(k => k._1.copy(rank = k._2)) + .map(k => generateAuthor(k)) + .asJava + ) // Mapping instance val instance = new Instance() @@ -360,7 +399,7 @@ object CrossrefUtility { ) } - if (instance.getLicense!= null) + if (instance.getLicense != null) instance.setAccessright( decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance) ) @@ -392,7 +431,7 @@ object CrossrefUtility { val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct //Mapping book if (className.toLowerCase.contains("book")) { - val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn + val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn if (ISBN.nonEmpty && containerTitles.nonEmpty) { val source = s"${containerTitles.head} ISBN: ${ISBN.head}" if (result.getSource != null) { @@ -404,8 +443,8 @@ object CrossrefUtility { } else { // Mapping Journal val issnInfos = for { - JObject(issn_type) <- json \ "issn-type" - JField("type", JString(tp)) <- issn_type + JObject(issn_type) <- json \ "issn-type" + JField("type", JString(tp)) <- issn_type JField("value", JString(vl)) <- issn_type } yield Tuple2(tp, vl) @@ -418,7 +457,7 @@ object CrossrefUtility { issnInfos.foreach(tp => { tp._1 match { case "electronic" => journal.setIssnOnline(tp._2) - case "print" => journal.setIssnPrinted(tp._2) + case "print" => journal.setIssnPrinted(tp._2) } }) } @@ -435,7 +474,6 @@ object CrossrefUtility { } } - result.setInstance(List(instance).asJava) result.setId("ID") result.setId(IdentifierFactory.createIdentifier(result, true)) @@ -453,16 +491,16 @@ object CrossrefUtility { //CC licenses if ( license.startsWith("cc") || - license.startsWith("http://creativecommons.org/licenses") || - license.startsWith("https://creativecommons.org/licenses") || + license.startsWith("http://creativecommons.org/licenses") || + license.startsWith("https://creativecommons.org/licenses") || - //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) - license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || - license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || - license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || + //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || - //APA (considered OPEN also by Unpaywall) - license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") + //APA (considered OPEN also by Unpaywall) + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") ) { val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() @@ -481,11 +519,11 @@ object CrossrefUtility { try { val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { - val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() + val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT() oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) return oaq } else { - return ModelConstants.EMBARGOED_ACCESS_RIGHT() + return ModelConstants.EMBARGOED_ACCESS_RIGHT() } } catch { case _: Exception => { @@ -511,7 +549,6 @@ object CrossrefUtility { ModelConstants.CLOSED_ACCESS_RIGHT() } - private def extractDate(dt: String, datePart: List[List[Int]]): String = { if (StringUtils.isNotBlank(dt)) return GraphCleaningFunctions.cleanDate(dt) @@ -533,11 +570,11 @@ object CrossrefUtility { } private def generateDate( - dt: String, - datePart: List[List[Int]], - classId: String, - schemeId: String - ): StructuredProperty = { + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { val dp = extractDate(dt, datePart) if (StringUtils.isNotBlank(dp)) structuredProperty(dp, classId, classId, schemeId) @@ -552,9 +589,9 @@ object CrossrefUtility { vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname resourceType match { - case "publication" => (new Publication, resourceType, term.getClassname) - case "dataset" => (new Dataset, resourceType, term.getClassname) - case "software" => (new Software, resourceType, term.getClassname) + case "publication" => (new Publication, resourceType, term.getClassname) + case "dataset" => (new Dataset, resourceType, term.getClassname) + case "software" => (new Software, resourceType, term.getClassname) case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname) } } else @@ -570,7 +607,15 @@ object CrossrefUtility { if (StringUtils.isNotBlank(ca.ORCID)) a.setPid( List( - OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null) + OafMapperUtils.authorPid( + ca.ORCID, + OafMapperUtils.qualifier( + ModelConstants.ORCID_PENDING, + ModelConstants.ORCID_PENDING, + ModelConstants.DNET_PID_TYPES + ), + null + ) ).asJava ) a diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index aa920ff6c..6e9675f20 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -7,8 +7,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import org.apache.commons.io.FileUtils; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; @@ -29,8 +27,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 89bdf0982..98ef2b9b8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Optional; @@ -16,10 +17,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Country; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class PropagationConstant { @@ -44,6 +43,7 @@ public class PropagationConstant { public final static String NULL = "NULL"; + public final static float PROPAGATION_TRUST = 0.85f; public static final String INSTITUTIONAL_REPO_TYPE = "institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; @@ -90,54 +90,22 @@ public class PropagationConstant { Country nc = new Country(); nc.setClassid(classid); nc.setClassname(classname); - nc.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); nc .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, - PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + OafMapperUtils + .dataInfo( + PROPAGATION_TRUST, + PROPAGATION_DATA_INFO_TYPE, + true, + OafMapperUtils + .qualifier( + PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, + PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); return nc; } - public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) { - - return getDataInfo(inference_provenance, inference_class_id, inference_class_name, qualifierSchema, "0.85"); - } - - public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, - String trust) { - return getDataInfo( - inference_provenance, inference_class_id, inference_class_name, qualifierSchema, trust, true); - - } - - public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, - String trust, boolean inferred) { - DataInfo di = new DataInfo(); - di.setInferred(inferred); - di.setDeletedbyinference(false); - di.setTrust(trust); - di.setInferenceprovenance(inference_provenance); - di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema)); - return di; - } - - public static Qualifier getQualifier(String inference_class_id, String inference_class_name, - String qualifierSchema) { - Qualifier pa = new Qualifier(); - pa.setClassid(inference_class_id); - pa.setClassname(inference_class_name); - pa.setSchemeid(qualifierSchema); - pa.setSchemename(qualifierSchema); - return pa; - } - public static ArrayList getOrganizationRelationPair(String orgId, String resultId, String classID, @@ -186,11 +154,18 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r + Provenance p = new Provenance(); + p .setDataInfo( - getDataInfo( - inference_provenance, inference_class_id, inference_class_name, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + OafMapperUtils + .dataInfo( + PROPAGATION_TRUST, inference_provenance, true, + OafMapperUtils + .qualifier( + inference_class_id, inference_class_name, + ModelConstants.DNET_PROVENANCE_ACTIONS))); + r.setProvenance(Arrays.asList(p)); + return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 0452a6ebf..feba09281 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; @@ -15,8 +14,6 @@ import com.google.gson.Gson; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -173,45 +170,39 @@ public class ResultTagger implements Serializable { .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( - CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, - DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS))); if (datasources.contains(cId)) dataInfoList .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( - CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, - DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, + DNET_PROVENANCE_ACTIONS))); if (czenodo.contains(cId)) dataInfoList .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( - CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, - DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS))); if (aconstraints.contains(cId)) dataInfoList .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, - DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + DNET_PROVENANCE_ACTIONS))); } }); @@ -235,45 +226,40 @@ public class ResultTagger implements Serializable { .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( - CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, - DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, + DNET_PROVENANCE_ACTIONS))); if (datasources.contains(c)) dataInfoList .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, - DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + DNET_PROVENANCE_ACTIONS))); if (czenodo.contains(c)) dataInfoList .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( - CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, - DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS))); if (aconstraints.contains(c)) dataInfoList .add( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, BULKTAG_DATA_INFO_TYPE, true, OafMapperUtils .qualifier( CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, - DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST)); + DNET_PROVENANCE_ACTIONS))); context.setDataInfo(dataInfoList); return context; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index aea21f8e5..7e862f548 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -20,5 +20,5 @@ public class TaggingConstants { public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo"; public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints"; - public static final String TAGGING_TRUST = "0.8"; + public static final float TAGGING_TRUST = 0.8f; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscBulkTag.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscBulkTag.java index c4b2122b4..ddbb4b71d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscBulkTag.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscBulkTag.java @@ -29,9 +29,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bulktag.SparkBulkTagJob; import eu.dnetlib.dhp.bulktag.community.*; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -132,12 +130,13 @@ public class SparkEoscBulkTag implements Serializable { .asList( OafMapperUtils .dataInfo( - false, BULKTAG_DATA_INFO_TYPE, true, false, + TAGGING_TRUST, + BULKTAG_DATA_INFO_TYPE, + true, OafMapperUtils .qualifier( CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, - DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), - TAGGING_TRUST))); + DNET_PROVENANCE_ACTIONS)))); value.getContext().add(context); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscTag.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscTag.java index c131399cc..9dca82547 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscTag.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/eosc/SparkEoscTag.java @@ -221,7 +221,7 @@ public class SparkEoscTag { return words; } - private static Set getWordsF(List> elem) { + private static Set getWordsF(List elem) { Set words = new HashSet<>(); Optional .ofNullable(elem) @@ -230,7 +230,7 @@ public class SparkEoscTag { .forEach( t -> words .addAll( - Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))))); + Arrays.asList(t.toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))))); return words; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index b9f3bff52..31970b3bf 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -96,8 +96,7 @@ public class PrepareDatasourceCountryAssociation { // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass Dataset relation = readPath(spark, inputPath + "/relation", Relation.class) .filter( - (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && - !rel.getDataInfo().getDeletedbyinference()); + (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY)); // filtering of the organization taking only the non deleted by inference and those with information about the // country diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index d9f6433a0..ef246261c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -23,6 +23,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; public class SparkCountryPropagationJob { @@ -126,6 +127,7 @@ public class SparkCountryPropagationJob { .filter(c -> !finalCountries.contains(c.getClassid())) .map(c -> getCountry(c.getClassid(), c.getClassname())) .collect(Collectors.toList()); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index a38b4da2e..2f04c3898 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -24,8 +24,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.AuthorPid; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; public class SparkOrcidToResultFromSemRelJob { @@ -171,21 +173,26 @@ public class SparkOrcidToResultFromSemRelJob { } } if (toaddpid) { - StructuredProperty p = new StructuredProperty(); + AuthorPid p = new AuthorPid(); p.setValue(autoritative_author.getOrcid()); p .setQualifier( - getQualifier( - ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); + OafMapperUtils + .qualifier( + ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, + ModelConstants.DNET_PID_TYPES)); p .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + OafMapperUtils + .dataInfo( + PROPAGATION_TRUST, + PROPAGATION_DATA_INFO_TYPE, true, OafMapperUtils + .qualifier( + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); - Optional> authorPid = Optional.ofNullable(author.getPid()); + Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { authorPid.get().add(p); } else { @@ -197,7 +204,7 @@ public class SparkOrcidToResultFromSemRelJob { } private static boolean containsAllowedPid(Author a) { - Optional> pids = Optional.ofNullable(a.getPid()); + Optional> pids = Optional.ofNullable(a.getPid()); if (!pids.isPresent()) { return false; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 50df08f8c..e9e47dc28 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -24,6 +24,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; public class SparkResultToCommunityFromOrganizationJob { @@ -126,16 +128,20 @@ public class SparkResultToCommunityFromOrganizationJob { .setDataInfo( Arrays .asList( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + OafMapperUtils + .dataInfo( + PROPAGATION_TRUST, + PROPAGATION_DATA_INFO_TYPE, true, + OafMapperUtils + .qualifier( + PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, + PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)))); propagatedContexts.add(newContext); } } res.setContext(propagatedContexts); - ret.mergeFrom(res); + ret = MergeUtils.merge(ret, res); } return ret; }; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index f31a26230..4ced60112 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -21,6 +21,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; public class SparkResultToCommunityThroughSemRelJob { @@ -122,11 +124,14 @@ public class SparkResultToCommunityThroughSemRelJob { .setDataInfo( Arrays .asList( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + OafMapperUtils + .dataInfo( + PROPAGATION_TRUST, PROPAGATION_DATA_INFO_TYPE, true, + OafMapperUtils + .qualifier( + PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, + PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)))); return newContext; } return null; @@ -139,7 +144,7 @@ public class SparkResultToCommunityThroughSemRelJob { r.setId(ret.getId()); r.setContext(contextList); - ret.mergeFrom(r); + ret = MergeUtils.merge(ret, r); } return ret; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java index 23909fd9a..97e46ddec 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -115,8 +115,7 @@ public class PrepareInfo implements Serializable { relation .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) + (FilterFunction) r -> r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java index 1adbbe60e..ff0b4aa29 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -126,10 +126,14 @@ public class StepActions implements Serializable { .stream() .filter( rel -> !rel - .getDataInfo() - .getProvenanceaction() - .getClassid() - .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) + .getProvenance() + .stream() + .anyMatch( + p -> p + .getDataInfo() + .getProvenanceaction() + .getClassid() + .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID))) .count() > 0) { return null; } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index cdb1bbb15..3a1f5b616 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -53,7 +53,8 @@ class ResolveEntitiesTest extends Serializable { def generateUpdates(spark: SparkSession): Unit = { val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString - val pids: List[String] = template.linesWithSeparators.map(l =>l.stripLineEnd) + val pids: List[String] = template.linesWithSeparators + .map(l => l.stripLineEnd) .map { id => val r = new Result r.setId(id.toLowerCase.trim) @@ -126,7 +127,7 @@ class ResolveEntitiesTest extends Serializable { entities.foreach { e => val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString spark - .createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l =>l.stripLineEnd).toList)) + .createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l => l.stripLineEnd).toList)) .as[String] .write .option("compression", "gzip") @@ -263,7 +264,8 @@ class ResolveEntitiesTest extends Serializable { Source .fromInputStream(this.getClass.getResourceAsStream(s"publication")) .mkString - .linesWithSeparators.map(l =>l.stripLineEnd) + .linesWithSeparators + .map(l => l.stripLineEnd) .next(), classOf[Publication] ) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index 0ea908290..b838ae065 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -47,7 +47,7 @@ class ScholixGraphTest extends AbstractVocabularyTest { val inputRelations = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")) .mkString - val items = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd).toList + val items = inputRelations.linesWithSeparators.map(l => l.stripLineEnd).toList assertNotNull(items) items.foreach(i => assertTrue(i.nonEmpty)) val result = @@ -69,7 +69,8 @@ class ScholixGraphTest extends AbstractVocabularyTest { getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") ) .mkString - val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd) + val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators + .map(l => l.stripLineEnd) .sliding(2) .map(s => (s.head, s(1))) .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) From eca7ebab6d10674776b9038dcf3514ff166941b8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 15 Feb 2023 16:41:28 +0100 Subject: [PATCH 3/3] [enrichment] adding relations in one side only --- .../SparkResultToProjectThroughSemRelJob.java | 35 ++++++------------- ...arkResultToOrganizationFromIstRepoJob.java | 10 +++--- .../SparkResultToOrganizationFromSemRel.java | 13 ------- 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1ec521af1..a5868ffbd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -107,30 +107,17 @@ public class SparkResultToProjectThroughSemRelJob { potentialUpdate .getProjectSet() .forEach( - projectId -> { - newRelations - .add( - getRelation( - resId, - projectId, - ModelConstants.IS_PRODUCED_BY, - ModelConstants.RESULT_PROJECT, - ModelConstants.OUTCOME, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, - PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - newRelations - .add( - getRelation( - projectId, - resId, - ModelConstants.PRODUCES, - ModelConstants.RESULT_PROJECT, - ModelConstants.OUTCOME, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, - PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - }); + projectId -> newRelations + .add( + getRelation( + projectId, + resId, + ModelConstants.PRODUCES, + ModelConstants.RESULT_PROJECT, + ModelConstants.OUTCOME, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME))); return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0757ebccd..55bc73e83 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -139,10 +139,12 @@ public class SparkResultToOrganizationFromIstRepoJob { organizations .forEach( orgId -> newRelations - .addAll( - getOrganizationRelationPair( - orgId, - resultId, + .add( + getRelation( + resultId, orgId, + ModelConstants.HAS_AUTHOR_INSTITUTION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java index cfc69a8f0..91d28ef34 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -211,19 +211,6 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) .mapGroups( (MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(Relation.class)) - .flatMap( - (FlatMapFunction) r -> Arrays - .asList( - r, getRelation( - r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) - .iterator() - - , Encoders.bean(Relation.class)) .write() .mode(SaveMode.Append)