diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index d2ef9fa7b..dc38f218f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -7,7 +7,8 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; public class ModelConstants { - private ModelConstants() {} + private ModelConstants() { + } public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; @@ -80,7 +81,6 @@ public class ModelConstants { public static final String PROVENANCE_DEDUP = "sysimport:dedup"; public static final String PROVENANCE_ENRICH = "sysimport:enrich"; - public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier( SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS); @@ -127,8 +127,6 @@ public class ModelConstants { public static final String IS_REQUIRED_BY = "IsRequiredBy"; public static final String REQUIRES = "Requires"; - - public static final String CITATION = "citation"; // subreltype public static final String CITES = "Cites"; public static final String IS_CITED_BY = "IsCitedBy"; @@ -219,7 +217,7 @@ public class ModelConstants { "main title", "main title", DNET_DATACITE_TITLE); public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier( - "alternative title", "alternative title", DNET_DATACITE_TITLE); + "alternative title", "alternative title", DNET_DATACITE_TITLE); private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java index 3ea391bd4..3ee3ed5a1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java @@ -209,7 +209,8 @@ public class ModelSupport { return idPrefixMap.get(clazz); } - public static Boolean sameClass(X left, Y right, Class superClazz) { + public static Boolean sameClass(X left, Y right, + Class superClazz) { return isSubClass(left, superClazz) && isSubClass(right, superClazz); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index cc6e10d81..ae275681d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -31,10 +31,10 @@ public class MergeUtils { return mergeRelation(left, right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -53,10 +53,10 @@ public class MergeUtils { return mergeProject(left, right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -110,8 +110,8 @@ public class MergeUtils { mergedEntity.setLastupdatetimestamp(enrich.getLastupdatetimestamp()); } else if (mergedEntity.getLastupdatetimestamp() != null && enrich.getLastupdatetimestamp() != null) { mergedEntity - .setLastupdatetimestamp( - Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp())); + .setLastupdatetimestamp( + Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp())); } mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid())); @@ -138,7 +138,7 @@ public class MergeUtils { checkArgument(Objects.equals(original.getTarget(), enrich.getTarget()), "target ids must be equal"); checkArgument(Objects.equals(original.getRelType(), enrich.getRelType()), "relType(s) must be equal"); checkArgument( - Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); + Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(original.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal"); original.setProvenance(mergeLists(original.getProvenance(), enrich.getProvenance())); @@ -148,10 +148,10 @@ public class MergeUtils { original.setValidationDate(ModelSupport.oldest(original.getValidationDate(), enrich.getValidationDate())); } catch (ParseException e) { throw new IllegalArgumentException(String - .format( - "invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), - original.getTarget(), - original.getValidationDate())); + .format( + "invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), + original.getTarget(), + original.getValidationDate())); } return (T) original; @@ -370,7 +370,7 @@ public class MergeUtils { private static T mergePublication(T original, T enrich) { - //add publication specific fields. + // add publication specific fields. mergeEntityDataInfo(original, enrich); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index ff16cf4d8..723254bab 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -363,7 +363,8 @@ public class OafMapperUtils { final Entity entity, final String validationDate) { - final List provenance = getProvenance(entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo())); + final List provenance = getProvenance( + entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo())); return getRelation( source, target, relType, subRelType, relClass, provenance, validationDate, null); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java index 6529d43da..0650dc53b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java @@ -1,8 +1,13 @@ + package eu.dnetlib.dhp.common.vocabulary; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -12,73 +17,63 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -import static org.mockito.Mockito.lenient; - +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class VocabularyTest { + @Mock + protected ISLookUpService isLookUpService; - @Mock - protected ISLookUpService isLookUpService; + protected VocabularyGroup vocabularies; - protected VocabularyGroup vocabularies; + @BeforeEach + public void setUpVocabulary() throws ISLookUpException, IOException { - @BeforeEach - public void setUpVocabulary() throws ISLookUpException, IOException { + lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); - lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); + lenient() + .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) + .thenReturn(synonyms()); + vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService); + } - lenient() - .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) - .thenReturn(synonyms()); - vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService); - } + private static List vocs() throws IOException { + return IOUtils + .readLines( + Objects + .requireNonNull( + VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"))); + } - private static List vocs() throws IOException { - return IOUtils - .readLines( - Objects - .requireNonNull( - VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"))); - } + private static List synonyms() throws IOException { + return IOUtils + .readLines( + Objects + .requireNonNull( + VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"))); + } - private static List synonyms() throws IOException { - return IOUtils - .readLines( - Objects - .requireNonNull( - VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"))); - } + @Test + void testVocabularyMatch() throws Exception { + final String s = IOUtils.toString(this.getClass().getResourceAsStream("terms")); + for (String s1 : s.split("\n")) { - @Test - void testVocabularyMatch () throws Exception{ - final String s= IOUtils.toString(this.getClass().getResourceAsStream("terms")); + final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1); - for (String s1 : s.split("\n")) { + if (t1 == null) { + System.err.println(s1 + " Missing"); + } else { + System.out.println("syn=" + s1 + " term = " + t1.getClassid()); - final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1); + System.out + .println( + vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname()); + } + } - if (t1 == null) { - System.err.println(s1+ " Missing"); - } - else { - System.out.println("syn=" + s1 + " term = " + t1.getClassid()); - - - System.out.println(vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname()); - } - } - - - - - - } + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala index 28ea64c9b..4d81b4858 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala @@ -13,8 +13,8 @@ import org.json4s.jackson.JsonMethods.parse import scala.collection.JavaConverters._ - case class CrossrefDT(doi: String, json: String, timestamp: Long) {} + object CrossrefUtility { val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" val DOI_PREFIX = "10." @@ -37,7 +37,6 @@ object CrossrefUtility { ret } - def extractDate(dt: String, datePart: List[List[Int]]): String = { if (StringUtils.isNotBlank(dt)) return GraphCleaningFunctions.cleanDate(dt) @@ -60,36 +59,35 @@ object CrossrefUtility { } private def generateDate( - dt: String, - datePart: List[List[Int]], - classId: String, - schemeId: String - ): StructuredProperty = { + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { val dp = extractDate(dt, datePart) if (StringUtils.isNotBlank(dp)) - structuredProperty(dp, classId, classId,schemeId) + structuredProperty(dp, classId, classId, schemeId) else null } - - private def generateItemFromType(objectType: String, vocabularies:VocabularyGroup): (Result, String) = { + private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = { val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType) if (term != null) { - val resourceType = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname + val resourceType = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname resourceType match { - case "publication" =>(new Publication, resourceType) - case "dataset" =>(new Dataset, resourceType) - case "software" => (new Software, resourceType) - case "otherresearchproduct" =>(new OtherResearchProduct, resourceType) + case "publication" => (new Publication, resourceType) + case "dataset" => (new Dataset, resourceType) + case "software" => (new Software, resourceType) + case "otherresearchproduct" => (new OtherResearchProduct, resourceType) } } else null } - - def convert(input: String, vocabularies:VocabularyGroup): List[Oaf] = { + def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -124,14 +122,13 @@ object CrossrefUtility { result match { case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) + case dataset: Dataset => convertDataset(dataset) } resultList = resultList ::: List(result) resultList } - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -140,8 +137,9 @@ object CrossrefUtility { result.setPid( List( - structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES) - ).asJava) + structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES) + ).asJava + ) //MAPPING Crossref DOI into OriginalId //and Other Original Identifier of dataset like clinical-trial-number @@ -149,11 +147,10 @@ object CrossrefUtility { val alternativeIds: List[String] = for (JString(ids) <- json \ "alternative-id") yield ids val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi) - result.setOriginalId(tmp.filter(id => id != null).asJava) // Add DataInfo - result.setDataInfo(dataInfo(false, false,0.9F,null, false,ModelConstants.REPOSITORY_PROVENANCE_ACTIONS)) + result.setDataInfo(dataInfo(false, false, 0.9f, null, false, ModelConstants.REPOSITORY_PROVENANCE_ACTIONS)) result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long]) result.setDateofcollection((json \ "indexed" \ "date-time").extract[String]) @@ -167,23 +164,26 @@ object CrossrefUtility { // TITLE val mainTitles = - for {JString(title) <- json \ "title" if title.nonEmpty} - yield - structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER) + for { JString(title) <- json \ "title" if title.nonEmpty } yield structuredProperty( + title, + ModelConstants.MAIN_TITLE_QUALIFIER + ) val originalTitles = for { JString(title) <- json \ "original-title" if title.nonEmpty } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val shortTitles = for { JString(title) <- json \ "short-title" if title.nonEmpty - } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) + } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) val subtitles = - for {JString(title) <- json \ "subtitle" if title.nonEmpty} - yield structuredProperty(title, ModelConstants.SUBTITLE_QUALIFIER) + for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( + title, + ModelConstants.SUBTITLE_QUALIFIER + ) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) // DESCRIPTION val descriptionList = - for {JString(description) <- json \ "abstract"} yield description + for { JString(description) <- json \ "abstract" } yield description result.setDescription(descriptionList.asJava) // Source @@ -242,11 +242,9 @@ object CrossrefUtility { //Mapping Subject val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List()) - - if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava + subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava ) } @@ -265,8 +263,8 @@ object CrossrefUtility { // Mapping instance val instance = new Instance() val license = for { - JObject(license) <- json \ "license" - JField("URL", JString(lic)) <- license + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license JField("content-version", JString(content_version)) <- license } yield (asField(lic), content_version) val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala index fd4bcd37d..0d45d1c83 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/GenerateCrossrefDataset.scala @@ -3,20 +3,20 @@ package eu.dnetlib.dhp.crossref import eu.dnetlib.dhp.application.AbstractScalaApplication import org.slf4j.{Logger, LoggerFactory} -class GenerateCrossrefDataset (propertyPath: String, args: Array[String], log: Logger) - extends AbstractScalaApplication(propertyPath, args, log: Logger) { +class GenerateCrossrefDataset(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + /** Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + * where the whole logic of the spark node is defined + */ override def run(): Unit = ??? } - -object GenerateCrossrefDataset{ - val log:Logger = LoggerFactory.getLogger(getClass) - val propertyPath ="/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" +object GenerateCrossrefDataset { + val log: Logger = LoggerFactory.getLogger(getClass) + val propertyPath = "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" def main(args: Array[String]): Unit = { - new GenerateCrossrefDataset(propertyPath,args, log).initialize().run() + new GenerateCrossrefDataset(propertyPath, args, log).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 38a3350a0..afb687b37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -284,11 +284,11 @@ object DataciteToOAFTransformation { } def generateRelation( - sourceId: String, - targetId: String, - relClass: String, - collectedFrom: KeyValue, - di: DataInfo + sourceId: String, + targetId: String, + relClass: String, + collectedFrom: KeyValue, + di: DataInfo ): Relation = { val r = new Relation r.setSource(sourceId) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 091d48713..98a8c4c68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -360,10 +360,13 @@ object BioDBToOAF { val rel = new Relation - val provenance = OafMapperUtils.getProvenance(Lists.newArrayList( - collectedFrom, - collectedFromMap("pdb") - ), REL_DATA_INFO) + val provenance = OafMapperUtils.getProvenance( + Lists.newArrayList( + collectedFrom, + collectedFromMap("pdb") + ), + REL_DATA_INFO + ) rel.setProvenance(provenance)