From 2480e590d1c5b440c336febc51fa3f53270e3dbf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 14:25:23 +0100 Subject: [PATCH 01/15] [DOIBoost - Mapping] changed the type on which to map dissertation from Crossref: from 006 Doctoral thesis to 0044 Thesis since dissertation could be either Doctoral or master thesis --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 25f0ff381f..dc9e18fde5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -70,7 +70,7 @@ case object Crossref2Oaf { "reference-book" -> "0002 Book", "monograph" -> "0002 Book", "journal-article" -> "0001 Article", - "dissertation" -> "0006 Doctoral thesis", + "dissertation" -> "0044 Thesis", "other" -> "0038 Other literature type", "peer-review" -> "0015 Review", "proceedings" -> "0004 Conference object", From 779318961cd4e7a1b24990229caf74a1775230ac Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 14:38:52 +0100 Subject: [PATCH 02/15] [DOIBoost - Mapping] removed the url from crossref containing the api.elsevier.com... string in the url --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index dc9e18fde5..53a9e8bd46 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -207,7 +207,7 @@ case object Crossref2Oaf { instance.setDateofacceptance(asField(createdDate.getValue)) } val s: String = (json \ "URL").extract[String] - val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null).distinct + val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && !p.contains("api.elsevier.com...")).distinct if (links.nonEmpty) { instance.setUrl(links.asJava) } From b2bb8d9d7908508f7d7abaf4d0d5d246fb38a42d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 15:44:57 +0100 Subject: [PATCH 03/15] [DOIBoost - Mapping] selecting the url from Crossref containing the doi --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 53a9e8bd46..51637f5bb5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -207,7 +207,7 @@ case object Crossref2Oaf { instance.setDateofacceptance(asField(createdDate.getValue)) } val s: String = (json \ "URL").extract[String] - val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && !p.contains("api.elsevier.com...")).distinct + val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct if (links.nonEmpty) { instance.setUrl(links.asJava) } From 683fe093cff2a76fb42d27c52c5c61831f48c268 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 15:51:26 +0100 Subject: [PATCH 04/15] [DOIBoost - Mapping] Remove the addition of the instance to the MAG publication record --- .../doiboost/mag/SparkProcessMAG.scala | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index ecb389af87..c011cbd208 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -111,22 +111,24 @@ object SparkProcessMAG { .map(item => ConversionUtil.updatePubsWithConferenceInfo(item)) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/merge_step_2_conference") - - - magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] - - val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] - - - logger.info("Phase 5) enrich publication with URL and Instances") - magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") - .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } - .write.mode(SaveMode.Overwrite) .save(s"$workingPath/merge_step_3") + //no more needed to add the instance to mag records +// magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] +// .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] +// +// val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] +// +// +// +// logger.info("Phase 5) enrich publication with URL and Instances") +// magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") +// .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } +// .write.mode(SaveMode.Overwrite) +// .save(s"$workingPath/merge_step_3") + + // logger.info("Phase 6) Enrich Publication with description") // val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] // pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") From 96769b44816bbebbeee2bf47c03e9079f83ea441 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:43:36 +0100 Subject: [PATCH 05/15] [DOIBoost - Mapping] Changed the logic which brought in in the instance urls that should not be there: The urld of the doi in the json is reachable from the root (json/"URL") other urls where added from the links element. Now the mapping from the link element has been removed --- .../dnetlib/doiboost/crossref/Crossref2Oaf.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 51637f5bb5..4c06d283a6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -206,11 +206,16 @@ case object Crossref2Oaf { else { instance.setDateofacceptance(asField(createdDate.getValue)) } - val s: String = (json \ "URL").extract[String] - val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct - if (links.nonEmpty) { - instance.setUrl(links.asJava) - } + val s: List[String] = List((json \ "URL").extract[String]) +// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct +// if (links.nonEmpty) { +// instance.setUrl(links.asJava) +// } + if(s.nonEmpty) + { + instance.setUrl(s.asJava) + } + result.setInstance(List(instance).asJava) //IMPORTANT From d97ea82a2922846a2e03df84992df10cb479287c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:45:15 +0100 Subject: [PATCH 06/15] [DOIBoost Mapping] Added test to verify the instance created for Crossref will have just the url related to the doi --- .../crossref/CrossrefMappingTest.scala | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 75fb3f787a..f6d5e124ea 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -612,4 +612,26 @@ class CrossrefMappingTest { } + @Test + def testMultipleURLs() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("multiple_urls.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertEquals(1, item.getInstance().size()) + assertEquals(1, item.getInstance().get(0).getUrl().size()) + assertEquals("http://dx.doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) + //println(mapper.writeValueAsString(item)) + + } + } From edf55395e97d92d9b8eb7c29184f0664b0ed7345 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:49:30 +0100 Subject: [PATCH 07/15] added test resourse --- .../doiboost/crossref/multiple_urls.json | 614 ++++++++++++++++++ 1 file changed, 614 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json new file mode 100644 index 0000000000..5f90feac41 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json @@ -0,0 +1,614 @@ + +{ +"indexed": { +"date-parts": [ +[ +2021, +10, +31 +] +], +"date-time": "2021-10-31T15:48:01Z", +"timestamp": 1635695281393 +}, +"reference-count": 39, +"publisher": "Elsevier BV", +"license": [ +{ +"start": { +"date-parts": [ +[ +2019, +12, +1 +] +], +"date-time": "2019-12-01T00:00:00Z", +"timestamp": 1575158400000 +}, +"content-version": "tdm", +"delay-in-days": 0, +"URL": "https://www.elsevier.com/tdm/userlicense/1.0/" +}, +{ +"start": { +"date-parts": [ +[ +2019, +9, +13 +] +], +"date-time": "2019-09-13T00:00:00Z", +"timestamp": 1568332800000 +}, +"content-version": "vor", +"delay-in-days": 0, +"URL": "http://creativecommons.org/licenses/by/4.0/" +} +], +"funder": [ +{ +"DOI": "10.13039/100001182", +"name": "INSTAP", +"doi-asserted-by": "publisher" +}, +{ +"DOI": "10.13039/100014440", +"name": "Ministry of Science, Innovation and Universities", +"doi-asserted-by": "publisher", +"award": [ +"RYC-2016-19637" +] +}, +{ +"DOI": "10.13039/100010661", +"name": "European Union’s Horizon 2020", +"doi-asserted-by": "publisher", +"award": [ +"746446" +] +} +], +"content-domain": { +"domain": [ +"elsevier.com", +"sciencedirect.com" +], +"crossmark-restriction": true +}, +"short-container-title": [ +"Journal of Archaeological Science" +], +"published-print": { +"date-parts": [ +[ +2019, +12 +] +] +}, +"DOI": "10.1016/j.jas.2019.105013", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2019, +9, +25 +] +], +"date-time": "2019-09-25T20:05:08Z", +"timestamp": 1569441908000 +}, +"page": "105013", +"update-policy": "http://dx.doi.org/10.1016/elsevier_cm_policy", +"source": "Crossref", +"is-referenced-by-count": 21, +"title": [ +"A brave new world for archaeological survey: Automated machine learning-based potsherd detection using high-resolution drone imagery" +], +"prefix": "10.1016", +"volume": "112", +"author": [ +{ +"given": "H.A.", +"family": "Orengo", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"given": "A.", +"family": "Garcia-Molsosa", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "78", +"reference": [ +{ +"key": "10.1016/j.jas.2019.105013_bib1", +"doi-asserted-by": "crossref", +"first-page": "85", +"DOI": "10.1080/17538947.2016.1250829", +"article-title": "Remote sensing heritage in a petabyte-scale: satellite data and heritage Earth Engine© applications", +"volume": "10", +"author": "Agapiou", +"year": "2017", +"journal-title": "Int. J. Digit. Earth" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib2", +"series-title": "Extracting Meaning from Ploughsoil Assemblages", +"first-page": "1", +"article-title": "Extracting meaning from ploughsoil assemblages: assessments of the past, strategies for the future", +"author": "Alcock", +"year": "2000" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib3", +"series-title": "Side-by-Side Survey. Comparative Regional Studies in the Mediterranean World", +"first-page": "1", +"article-title": "Introduction", +"author": "Alcock", +"year": "2004" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib4", +"doi-asserted-by": "crossref", +"first-page": "93", +"DOI": "10.1111/j.1538-4632.1995.tb00338.x", +"article-title": "Local indicators of spatial association—LISA", +"volume": "27", +"author": "Anselin", +"year": "1995", +"journal-title": "Geogr. Anal." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib5", +"series-title": "Archaeological Survey", +"author": "Banning", +"year": "2002" +}, +{ +"issue": "1/2", +"key": "10.1016/j.jas.2019.105013_bib6", +"doi-asserted-by": "crossref", +"first-page": "123", +"DOI": "10.2307/3181488", +"article-title": "GIS, archaeological survey and landscape archaeology on the island of Kythera, Greece", +"volume": "29", +"author": "Bevan", +"year": "2004", +"journal-title": "J. Field Archaeol." +}, +{ +"issue": "1", +"key": "10.1016/j.jas.2019.105013_bib8", +"doi-asserted-by": "crossref", +"first-page": "5", +"DOI": "10.1023/A:1010933404324", +"article-title": "Random forests", +"volume": "45", +"author": "Breiman", +"year": "2001", +"journal-title": "Mach. Learn." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib9", +"series-title": "Sampling in Contemporary British Archaeology", +"author": "Cherry", +"year": "1978" +}, +{ +"issue": "3", +"key": "10.1016/j.jas.2019.105013_bib10", +"doi-asserted-by": "crossref", +"first-page": "273", +"DOI": "10.1016/0734-189X(84)90197-X", +"article-title": "Segmentation of a high-resolution urban scene using texture operators", +"volume": "25", +"author": "Conners", +"year": "1984", +"journal-title": "Comput. Vis. Graph Image Process" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib11", +"first-page": "31", +"article-title": "Old land surfaces and modern ploughsoil: implications of recent work at Maxey, Cambridgeshire", +"volume": "2", +"author": "Crowther", +"year": "1983", +"journal-title": "Scott. Archaeol. Rev." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib12", +"series-title": "Settlement Pattern Studies in the Americas: Fifty Years since Virú", +"first-page": "203", +"article-title": "Conclusions: the settlement pattern concept from an Americanist perspective", +"author": "Fish", +"year": "1999" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib13", +"doi-asserted-by": "crossref", +"first-page": "21", +"DOI": "10.3390/geosciences9010021", +"article-title": "Remote sensing and historical morphodynamics of alluvial plains. The 1909 indus flood and the city of Dera Gazhi Khan (province of Punjab, Pakistan)", +"volume": "9", +"author": "Garcia", +"year": "2019", +"journal-title": "Geosciences" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib14", +"unstructured": "Georgiadis, M.; Garcia-Molsosa, A.; Orengo, H.A.; Kefalidou, E. and Kallintzi, K. In Preparation. APAX Project 2015-2018: A Preliminary Report. (Hesperia)." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib15", +"series-title": "Geographical Information Systems and Landscape Archaeology", +"first-page": "35", +"article-title": "Regional survey and GIS: the boeotia project", +"author": "Gillings", +"year": "1999" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib16", +"doi-asserted-by": "crossref", +"first-page": "18", +"DOI": "10.1016/j.rse.2017.06.031", +"article-title": "Google Earth engine: planetary-scale geospatial analysis for everyone", +"volume": "202", +"author": "Gorelick", +"year": "2017", +"journal-title": "Remote Sens. Environ." +}, +{ +"issue": "107", +"key": "10.1016/j.jas.2019.105013_bib17", +"doi-asserted-by": "crossref", +"first-page": "177", +"DOI": "10.1111/j.0031-868X.2004.00278.x", +"article-title": "Photogrammetric reconstruction of the great buddha of Bamiyan, Afghanistan", +"volume": "19", +"author": "Grün", +"year": "2004", +"journal-title": "Photogramm. Rec." +}, +{ +"issue": "6", +"key": "10.1016/j.jas.2019.105013_bib18", +"doi-asserted-by": "crossref", +"first-page": "610", +"DOI": "10.1109/TSMC.1973.4309314", +"article-title": "Textural features for image classification", +"author": "Haralick", +"year": "1973", +"journal-title": "IEEE Trans. Syst., Man, Cybernet., SMC-3" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib19", +"doi-asserted-by": "crossref", +"first-page": "76", +"DOI": "10.1558/jmea.v14i1.76", +"article-title": "Excavating to excess? Implications of the last decade of archaeology in Israel", +"volume": "14", +"author": "Kletter", +"year": "2001", +"journal-title": "J. Mediterr. Archaeol." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib20", +"first-page": "299", +"article-title": "Testing Google Earth Engine for the automatic identification and vectorization of archaeological features: a case study from Faynan, Jordan", +"volume": "15", +"author": "Liss", +"year": "2017", +"journal-title": "J. Archaeol. Sci.: Report" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib21", +"series-title": "Geographical Information Systems and Landscape Archaeology", +"first-page": "55", +"article-title": "Towards a methodology for modelling surface survey data: the sangro valley project", +"author": "Lock", +"year": "1999" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib22", +"series-title": "Extracting Meaning from Ploughsoil Assemblages", +"first-page": "5", +"article-title": "Methods of collection recording and quantification", +"author": "Mattingly", +"year": "2000" +}, +{ +"issue": "14", +"key": "10.1016/j.jas.2019.105013_bib23", +"doi-asserted-by": "crossref", +"first-page": "E778", +"DOI": "10.1073/pnas.1115472109", +"article-title": "Mapping patterns of long-term settlement in Northern Mesopotamia at a large scale", +"volume": "109", +"author": "Menze", +"year": "2012", +"journal-title": "Proc. Natl. Acad. Sci." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib24", +"doi-asserted-by": "crossref", +"first-page": "80", +"DOI": "10.1016/j.jas.2015.04.002", +"article-title": "A supervised machine-learning approach towards geochemical predictive modelling in archaeology", +"volume": "59", +"author": "Oonk", +"year": "2015", +"journal-title": "J. Archaeol. Sci." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib25", +"doi-asserted-by": "crossref", +"first-page": "49", +"DOI": "10.1016/j.isprsjprs.2012.07.005", +"article-title": "Combining terrestrial stereophotogrammetry, DGPS and GIS-based 3D voxel modelling in the volumetric recording of archaeological features", +"volume": "76", +"author": "Orengo", +"year": "2013", +"journal-title": "ISPRS J. Photogrammetry Remote Sens." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib26", +"doi-asserted-by": "crossref", +"first-page": "100", +"DOI": "10.1016/j.jas.2015.10.008", +"article-title": "Photogrammetric re-discovery of the Eastern Thessalian hidden long-term landscapes", +"volume": "64", +"author": "Orengo", +"year": "2015", +"journal-title": "J. Archaeol. Sci." +}, +{ +"issue": "3", +"key": "10.1016/j.jas.2019.105013_bib27", +"doi-asserted-by": "crossref", +"first-page": "479", +"DOI": "10.3764/aja.122.3.0479", +"article-title": "Towards a definition of Minoan agro-pastoral landscapes: results of the survey at Palaikastro (Crete)", +"volume": "122", +"author": "Orengo", +"year": "2018", +"journal-title": "Am. J. Archaeol." +}, +{ +"issue": "7", +"key": "10.1016/j.jas.2019.105013_bib28", +"doi-asserted-by": "crossref", +"first-page": "735", +"DOI": "10.3390/rs9070735", +"article-title": "Large-scale, multi-temporal remote sensing of palaeo-river networks: a case study from Northwest India and its implications for the Indus civilisation", +"volume": "9", +"author": "Orengo", +"year": "2017", +"journal-title": "Remote Sens." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib29", +"doi-asserted-by": "crossref", +"first-page": "1361", +"DOI": "10.1002/esp.4317", +"article-title": "Multi-scale relief model (MSRM): a new algorithm for the visualization of subtle topographic change of variable size in digital elevation models", +"volume": "43", +"author": "Orengo", +"year": "2018", +"journal-title": "Earth Surf. Process. Landforms" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib30", +"series-title": "Submitted to Proceedings of the National Academy of Sciences", +"article-title": "Living on the edge of the desert: automated detection of archaeological mounds in Cholistan (Pakistan) using machine learning classification of multi-sensor multi-temporal satellite data", +"author": "Orengo", +"year": "2019" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib31", +"first-page": "154", +"article-title": "How many trees in a random forest?", +"volume": "vol. 7376", +"author": "Oshiro", +"year": "2012" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib32", +"article-title": "Decision-making in modern surveys", +"volume": "ume 1", +"author": "Plog", +"year": "1978" +}, +{ +"issue": "4", +"key": "10.1016/j.jas.2019.105013_bib33", +"doi-asserted-by": "crossref", +"first-page": "100", +"DOI": "10.3390/geosciences7040100", +"article-title": "From above and on the ground: geospatial methods for recording endangered archaeology in the Middle East and north africa", +"volume": "7", +"author": "Rayne", +"year": "2017", +"journal-title": "Geosciences" +}, +{ +"issue": "1", +"key": "10.1016/j.jas.2019.105013_bib34", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.1080/00438243.1978.9979712", +"article-title": "The design of archaeological surveys", +"volume": "10", +"author": "Schiffer", +"year": "1978", +"journal-title": "World Archaeol." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib35", +"series-title": "Experiments in the Collection and Analysis of Archaeological Survey Data: the East Hampshire Survey", +"author": "Shennan", +"year": "1985" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib36", +"doi-asserted-by": "crossref", +"first-page": "1066", +"DOI": "10.1016/j.culher.2016.06.006", +"article-title": "Drones over Mediterranean landscapes. The potential of small UAV's (drones) for site detection and heritage management in archaeological survey projects: a case study from Le Pianelle in the Tappino Valley, Molise (Italy)", +"volume": "22", +"author": "Stek", +"year": "2016", +"journal-title": "J. Cult. Herit." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib37", +"series-title": "Side-by-Side Survey. Comparative Regional Studies in the Mediterranean World", +"first-page": "65", +"article-title": "Side-by-side and back to front: exploring intra-regional latitudinal and longitudinal comparability in survey data. Three case studies from Metaponto, southern Italy", +"author": "Thomson", +"year": "2004" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib38", +"series-title": "Digital Discovery. Exploring New Frontiers in Human Heritage. Computer Applications and Quantitative Methods in Archaeology", +"article-title": "Computer vision and machine learning for archaeology", +"author": "van der Maaten", +"year": "2007" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib39", +"doi-asserted-by": "crossref", +"first-page": "1114", +"DOI": "10.1111/j.1475-4754.2012.00667.x", +"article-title": "Computer vision-based orthophoto mapping of complex archaeological sites: the ancient quarry of Pitaranha (Portugal-Spain)", +"volume": "54", +"author": "Verhoeven", +"year": "2012", +"journal-title": "Archaeometry" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib40", +"series-title": "A Guide for Salvage Archeology", +"author": "Wendorf", +"year": "1962" +} +], +"container-title": [ +"Journal of Archaeological Science" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "https://api.elsevier.com/content/article/PII:S0305440319301001?httpAccept=text/xml", +"content-type": "text/xml", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "https://api.elsevier.com/content/article/PII:S0305440319301001?httpAccept=text/plain", +"content-type": "text/plain", +"content-version": "vor", +"intended-application": "text-mining" +} +], +"deposited": { +"date-parts": [ +[ +2019, +11, +25 +] +], +"date-time": "2019-11-25T06:46:34Z", +"timestamp": 1574664394000 +}, +"score": 1, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2019, +12 +] +] +}, +"references-count": 39, +"alternative-id": [ +"S0305440319301001" +], +"URL": "http://dx.doi.org/10.1016/j.jas.2019.105013", +"relation": { + +}, +"ISSN": [ +"0305-4403" +], +"issn-type": [ +{ +"value": "0305-4403", +"type": "print" +} +], +"subject": [ +"Archaeology", +"Archaeology" +], +"published": { +"date-parts": [ +[ +2019, +12 +] +] +}, +"assertion": [ +{ +"value": "Elsevier", +"name": "publisher", +"label": "This article is maintained by" +}, +{ +"value": "A brave new world for archaeological survey: Automated machine learning-based potsherd detection using high-resolution drone imagery", +"name": "articletitle", +"label": "Article Title" +}, +{ +"value": "Journal of Archaeological Science", +"name": "journaltitle", +"label": "Journal Title" +}, +{ +"value": "https://doi.org/10.1016/j.jas.2019.105013", +"name": "articlelink", +"label": "CrossRef DOI link to publisher maintained version" +}, +{ +"value": "article", +"name": "content_type", +"label": "Content Type" +}, +{ +"value": "© 2019 The Authors. Published by Elsevier Ltd.", +"name": "copyright", +"label": "Copyright" +} +], +"article-number": "105013" +} From d50057b2d92ab99407461d37490bff19b6608346 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:59:37 +0100 Subject: [PATCH 08/15] [DOIBoost Mapping] changed the way to create the url for the instance: we use the crooref guidelines https://doi.org/doi --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- .../java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 4c06d283a6..1b1c850ba3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -206,7 +206,7 @@ case object Crossref2Oaf { else { instance.setDateofacceptance(asField(createdDate.getValue)) } - val s: List[String] = List((json \ "URL").extract[String]) + val s: List[String] = List("https://doi.org/" + doi) // val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct // if (links.nonEmpty) { // instance.setUrl(links.asJava) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index f6d5e124ea..5ef92cfa47 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -629,7 +629,7 @@ class CrossrefMappingTest { assertEquals(1, item.getInstance().size()) assertEquals(1, item.getInstance().get(0).getUrl().size()) - assertEquals("http://dx.doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) + assertEquals("https://doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) //println(mapper.writeValueAsString(item)) } From de63d29b6f5b6cdc0a45df94f5ade5f9c4c28711 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Nov 2021 16:16:40 +0100 Subject: [PATCH 09/15] [DOIBoost Mapping] Fix to avoid to produce results with null as identifier (probably due to the filtering function in the factory for the creation of the id) --- .../dnetlib/doiboost/mag/SparkProcessMAG.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index c011cbd208..fa3be973de 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -164,12 +164,18 @@ object SparkProcessMAG { .write.mode(SaveMode.Overwrite) .save(s"$workingPath/mag_publication") + spark.read.load(s"$workingPath/mag_publication").as[Publication] + .filter(p => p.getId == null) + .groupByKey(p => p.getId) + .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) + .map(_._2) + .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] - .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) - .map(_._2) - - spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") +// val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] +// .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) +// .map(_._2) +// +// spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") } } From df7ee77c7afd7af06f90ebaac447decc665f2428 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Nov 2021 16:24:07 +0100 Subject: [PATCH 10/15] [DOIBoost Mapping] removed not needed comments --- .../doiboost/mag/SparkProcessMAG.scala | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index fa3be973de..016279787d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -114,25 +114,6 @@ object SparkProcessMAG { .save(s"$workingPath/merge_step_3") - //no more needed to add the instance to mag records -// magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] -// .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] -// -// val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] -// -// -// -// logger.info("Phase 5) enrich publication with URL and Instances") -// magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") -// .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } -// .write.mode(SaveMode.Overwrite) -// .save(s"$workingPath/merge_step_3") - - -// logger.info("Phase 6) Enrich Publication with description") -// val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] -// pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") - val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] @@ -171,11 +152,7 @@ object SparkProcessMAG { .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") -// val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] -// .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) -// .map(_._2) -// -// spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") + } } From 6477a40670d18d06b8992a8983f4f00c62220e4b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 9 Nov 2021 11:27:12 +0100 Subject: [PATCH 11/15] implement filter of openCitation --- .../eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 141b7b0736..4b82fe6455 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -59,7 +59,12 @@ object SparkConvertRDDtoDataset { log.info("Converting Relation") - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])).filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") + + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") + .map(s => mapper.readValue(s, classOf[Relation])) + .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") From 9cb195314fa15ff689cd12d66c3b299f006e7694 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 11 Nov 2021 10:17:40 +0100 Subject: [PATCH 12/15] implemented and tested resolution of entities --- .../resolution/SparkResolveEntities.scala | 96 +++++++++ .../resolution/SparkResolveRelation.scala | 2 +- .../resolution/resolve_entities_params.json | 6 + .../resolution/ResolveEntitiesTest.scala | 190 ++++++++++++++++++ .../dnetlib/dhp/oa/graph/resolution/dataset | 3 + .../oa/graph/resolution/otherresearchproduct | 2 + .../dhp/oa/graph/resolution/publication | 3 + .../dnetlib/dhp/oa/graph/resolution/software | 1 + .../dnetlib/dhp/oa/graph/resolution/updates | 4 + 9 files changed, 306 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala new file mode 100644 index 0000000000..60df58e45f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -0,0 +1,96 @@ +package eu.dnetlib.dhp.oa.graph.resolution + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkResolveEntities { + + val mapper = new ObjectMapper() + val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphBasePath = parser.get("graphBasePath") + log.info(s"graphBasePath -> $graphBasePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + val unresolvedPath = parser.get("unresolvedPath") + log.info(s"unresolvedPath -> $unresolvedPath") + + + resolveEntities(spark, workingPath, unresolvedPath) + + + generateResolvedEntities(spark, workingPath, graphBasePath) + + } + + +def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + import spark.implicits._ + + val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + val up: Dataset[(String, Result)] = spark.read.text(unresolvedPath).as[String].map(s => mapper.readValue(s, classOf[Result])).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + + rPid.joinWith(up, rPid("_2").equalTo(up("_1")), "inner").map { + r => + val result = r._2._2 + val dnetId = r._1._1 + result.setId(dnetId) + result + }.write.mode(SaveMode.Overwrite).save(s"$workingPath/resolvedEntities") + } + + + def deserializeObject(input:String, entity:EntityType ) :Result = { + + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software=> mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) + } + } + + def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { + + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + import spark.implicits._ + + val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + entities.foreach { + e => + + spark.read.text(s"$graphBasePath/$e").as[String] + .map(s => deserializeObject(s, e)) + .union(re) + .groupByKey(_.getId) + .reduceGroups { + (x, y) => + x.mergeFrom(y) + x + }.map(_._2) + .filter(r => r.getClass.getSimpleName.toLowerCase != "result") + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingPath/resolvedGraph/$e") + } + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index db93bc43fa..9e44e017ce 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -128,7 +128,7 @@ object SparkResolveRelation { source != null } - private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { + def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { import spark.implicits._ val d: RDD[(String, String)] = spark.sparkContext.textFile(s"$graphPath/*") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json new file mode 100644 index 0000000000..f38cc1291f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"u", "paramLongName":"unresolvedPath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala new file mode 100644 index 0000000000..9a142d3c0e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -0,0 +1,190 @@ +package eu.dnetlib.dhp.oa.graph.resolution + + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Result, StructuredProperty} +import org.apache.commons.io.FileUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.TestInstance.Lifecycle +import org.junit.jupiter.api.{AfterAll, BeforeAll, Test, TestInstance} + +import java.nio.file.{Files, Path} +import scala.collection.JavaConverters._ +import scala.io.Source + +@TestInstance(Lifecycle.PER_CLASS) +class ResolveEntitiesTest extends Serializable { + + var workingDir:Path = null + + val FAKE_TITLE = "FAKETITLE" + val FAKE_SUBJECT = "FAKESUBJECT" + + var sparkSession:Option[SparkSession] = None + + + @BeforeAll + def setUp() :Unit = { + workingDir = Files.createTempDirectory(getClass.getSimpleName) + + val conf = new SparkConf() + sparkSession = Some(SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master("local[*]").getOrCreate()) + populateDatasets(sparkSession.get) + generateUpdates(sparkSession.get) + + } + + + @AfterAll + def tearDown():Unit = { + FileUtils.deleteDirectory(workingDir.toFile) + sparkSession.get.stop() + + + } + + + + + def generateUpdates(spark:SparkSession):Unit = { + val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString + + + val pids:List[String] = template.lines.map{id => + val r = new Result + r.setId(id.toLowerCase.trim) + r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + r.setTitle(List(OafMapperUtils.structuredProperty(FAKE_TITLE, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + r + }.map{r => + val mapper = new ObjectMapper() + + mapper.writeValueAsString(r)}.toList + + + val sc =spark.sparkContext + + println(sc.parallelize(pids).count()) + + spark.createDataset(sc.parallelize(pids))(Encoders.STRING).write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingDir/updates") + + + + + + import spark.implicits._ + implicit val resEncoder: Encoder[Result] = Encoders.bean(classOf[Result]) + val ds = spark.read.text(s"$workingDir/updates").as[String].map{s => val mapper = new ObjectMapper() + mapper.readValue(s, classOf[Result])}.collect() + + + + + assertEquals(4, ds.length) + ds.foreach{r => assertNotNull(r.getSubject)} + ds.foreach{r => assertEquals(1,r.getSubject.size())} + ds.foreach{r => assertNotNull(r.getTitle)} + ds.foreach{r => assertEquals(1,r.getTitle.size())} + + + + ds.flatMap(r => r.getTitle.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_TITLE,t)) + ds.flatMap(r => r.getSubject.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_SUBJECT,t)) + + println("generated Updates") + } + + + def populateDatasets(spark:SparkSession):Unit = { + import spark.implicits._ + val entities =SparkResolveEntities.entities + + entities.foreach{ + e => + val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString + spark.createDataset(spark.sparkContext.parallelize(template.lines.toList)).as[String].write.option("compression", "gzip").text(s"$workingDir/graph/$e") + println(s"Created Dataset $e") + } + SparkResolveRelation.extractPidResolvedTableFromJsonRDD(spark, s"$workingDir/graph", s"$workingDir/work") + + } + + + @Test + def testResolution():Unit = { + val spark:SparkSession = sparkSession.get + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) + + val ds = spark.read.load(s"$workingDir/work/resolvedEntities").as[Result] + + assertEquals(3, ds.count()) + + ds.collect().foreach{ + r => + assertTrue(r.getId.startsWith("50")) + } + } + + + + + private def structuredPContainsValue(l:java.util.List[StructuredProperty], exptectedValue:String):Boolean = { + l.asScala.exists(p =>p.getValue!= null && p.getValue.equalsIgnoreCase(exptectedValue)) + } + + @Test + def testUpdate():Unit = { + val spark:SparkSession = sparkSession.get + import spark.implicits._ + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + val m = new ObjectMapper() + SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) + SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph" ) + + + + + val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) + val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + + val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) + val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) + val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + assertEquals(0, t) + assertEquals(2, td) + assertEquals(1, ts) + assertEquals(0, to) + + } + + + + + + + + + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset new file mode 100644 index 0000000000..05c875148c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct new file mode 100644 index 0000000000..f3693848de --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct @@ -0,0 +1,2 @@ +{"author":[{"affiliation":[],"fullname":"Martinec, Nikola","name":"Nikola","pid":[],"rank":1,"surname":"Martinec"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"contactgroup":[],"contactperson":[],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2021-10-25T05:50:25+0000","dateoftransformation":"2021-10-26T05:23:26.742Z","description":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Simulacije mekih tijela neizostavan su dio realističnog prikaza stvarnog svijeta u različitim virtualnim okruženjima. U ovom radu obrađene su dvije metode simulacije mekih tijela – metoda s oprugama i metoda usklađivanja oblika. Napravljene su programske implementacije obiju metoda te je prikazana usporedba u obliku brzine izvođenja i vizualne kvalitete simulacija. Metoda uspoređivanja oblika napravljena je po uzoru na rad Meshless Deformations Based on Shape Matching [2] dok su kod metode s oprugama, ponajviše za algoritme raspoređivanja opruga, korištena autorova rješenja. U radu su opisani karakteristični dijelovi potrebni za implementaciju kao i problemi koji su se javljali tijekom izrade ovoga rada."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::14cc6e339ce130a10dbc5d9269dee6a3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"master thesis","classname":"master thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/636614"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433838126,"originalId":["636614","50|57a035e5b1ae::14cc6e339ce130a10dbc5d9269dee6a3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2013-01-01"}],"resourcetype":{"classid":"master thesis","classname":"master thesis","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"meka tijela; simulacija; usklađivanje oblika; opruge i mase"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Simulacija elastičnih objekata"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Soft bodies simulation"}],"tool":[]} +{"author":[{"affiliation":[],"fullname":"Sobočanec, Sandra","name":"Sandra","pid":[],"rank":1,"surname":"Sobočanec"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"contactgroup":[],"contactperson":[],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"2021-10-25T05:50:25+0000","dateoftransformation":"2021-10-26T05:22:35.748Z","description":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Propolis je tijekom stoljeća, a u posljednje vrijeme i u suvremenoj medicinskoj praksi ispitivan i korišten kao protuvirusno, protubakterijsko i protugljivično sredstvo. Biološki učinci propolisa vezani su uz antibiotska, protuupalna i antioksidacijska svojstva. Jedan od sastojaka propolisa su flavonoidi, čija je karakteristika visoki antioksidacijski potencijal zbog kojeg im se pripisuje sposobnost prevencije različitih bolesti kao što su različite vrste tumora, osteoporoza, kardiovaskularne i neurodegenerativne bolesti. U našim ispitivanjima koristili smo nativni propolis u dvije doze (100 mg kg-1, 300 mg kg-1), kojem smo istražili sastav, udio i vrstu flavonoida odgovornih za njegov oksidacijski/antioksidacijski učinak. Krizin je bio prisutan u najvećoj koncentraciji, dok je izoramnetin bio prisutan u najmanjoj. Antioksidacijski potencijal nativnog propolisa u in vitro uvjetima je bio relativno nizak. Međutim, propolis značajno povisuje antioksidacijski učinak in vivo uvjetima u jetri, plućima i mozgu miševa. Učinak nativnog propolisa pokazao ovisio je o dozi i to tako da je u manjoj dozi djelovao antioksidacijski (zaštitno), a u većoj dozi prooksidacijski (štetno). Oksidacijski status nakon obradaa nativnim propolisom nije pokazao tkivnu, ali je pokazao doznu ovisnost. Antioksidacijski status pokazao je tkivnu i doznu ovisnost ; u mozgu nije pokazao promjenu, dok je najveća promjena zabilježena u plućima i u manjoj mjeri u jetri. Promjena aktivnosti antioksidacijskih enzima u jetri nije korelirala s količinom njihove mRNA, dok je u plućima promjena u aktivnosti istih uglavnom bila proporcionalna promjeni u mRNA. Nije zapažena značajna promjena u ekpresiji proteina za sve ispitivane antioksidacijske enzime, niti obzirom na dozu, niti obzirom na ispitivano tkivo. Analizom DNA čipova utvrđeno je da propolis u dozi od 100 mg kg-1 u jetri ima zaštitni učinak, dok je u dozi od 300 mg kg-1 taj učinak prooksidacijski. U plućima je propolis u obje doze imao zaštitni učinak, dok u mozgu nije imao učinka na ispitivane gene. U hiperbaričkim uvjetima propolis je smanjio oksidacijski stres u sva tri ispitivana tkiva u dozi od 100 mg kg-1. U plućima je taj učinak propolis ostvario povećanjem aktivnosti antioksidacijskih enzima (MnSOD, KAT). Antioksidacijski učinak propolisa porijeklom iz Hrvatske do sada nije istraživan u in vivo sistemima. Isto tako po prvi puta se ispituje nativni propolis (tehnologijska inovacija) za razliku od do sada ispitivanih vodenih ili alkoholnih ekstrakata. Rezultati ove studije također pokazuju da sposobnost zaštitnog odgovora u plućima na oksidacijski stres koji postoji u novorođenih, ali se gubi u odraslih jedinki, može biti obnovljena uporabom nativnog propolisa tijekom hiperoksije. Ta spoznaja može pomoći u zaštiti od hiperoksije u populaciji odraslih tijekom terapije kisikom."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::15786c1f0f2930ea867a234fbcb8ef08","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"doctoral thesis","classname":"doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/286858"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433372341,"originalId":["286858","50|57a035e5b1ae::15786c1f0f2930ea867a234fbcb8ef08"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2006-01-01"}],"resourcetype":{"classid":"doctoral thesis","classname":"doctoral thesis","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"propolis; flavonoidi; miševi; oksidacijski/antioksidacijski status; hiperoksija; normobarički uvjeti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Utjecaj propolisa na oksidacisjki/antioksidacijski status u CBA miša"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Propolis effect on oxidative/antioxidative status in CBA mice"}],"tool":[]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication new file mode 100644 index 0000000000..562a54760e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication @@ -0,0 +1,3 @@ +{"author":[{"fullname":"Makkonen, Lasse","name":"Lasse","pid":[],"rank":1,"surname":"Makkonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Watson, Rick"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"dateofcollection":"2021-10-04T12:42:57.502Z","dateoftransformation":"2021-10-04T15:32:51.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433424020,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-10-26T12:07:44Z","harvestDate":"2021-10-04T12:42:57.502Z","identifier":"oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Irish Wind Energy Association"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Makkonen , L 1997 , Calculation of ice accretion on wind turbine blades . in R Watson (ed.) , EWEC '97: European Wind Energy Conference : Proceedings of the international conference . Irish Wind Energy Association , Slane , European Wind Energy Conference EWEC '97 , Dublin , Ireland , 6/10/97 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Calculation of ice accretion on wind turbine blades"}]} +{"author":[{"fullname":"Peltola, Juho","name":"Juho","pid":[],"rank":1,"surname":"Peltola"},{"fullname":"Karvonen, Lassi","name":"Lassi","pid":[],"rank":2,"surname":"Karvonen"},{"fullname":"Elfvengren, J","name":"J.","pid":[],"rank":3,"surname":"Elfvengren"},{"fullname":"Kolehmainen, J","name":"J.","pid":[],"rank":4,"surname":"Kolehmainen"},{"fullname":"Kallio, Sirpa","name":"Sirpa","pid":[],"rank":5,"surname":"Kallio"},{"fullname":"Pallares, D","name":"D.","pid":[],"rank":6,"surname":"Pallares"},{"fullname":"Johnsson, F","name":"F.","pid":[],"rank":7,"surname":"Johnsson"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-10-04T12:31:44.326Z","dateoftransformation":"2021-10-04T17:18:21.611Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This work applies optical probe and particle image velocimetry (PIV) measurements for determining the local characteristics of the solids flow in a CFB riser and discusses benefits and limitations of the two methods. Measurements were carried out in an 8.5 m tall CFB riser with a cross-section of 0.7 m x 0.12 m. The optical probe was used to measure both solids volume fraction and velocity whereas the PIV was limited to solids velocity measurements. In addition, the vertical pressure profile and solids circulation rate were measured by means of pressure transducers. The work demonstrates the challenge of applying detailed solids flow measurements in CFB reactors. Both methods applied are associated with limitations and inaccuracies and it is concluded that a combination of both local measurement methods and the global pressure measurements will improve the interpretation of the solids flow field in CFB risers

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2a13bce11d3eb09889706369b114a476","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/c7012d64-5947-4f40-b62f-6c3f232af0b0"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433455504,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-26T12:04:31Z","harvestDate":"2021-10-04T12:31:44.326Z","identifier":"oai:cris.vtt.fi:publications/c7012d64-5947-4f40-b62f-6c3f232af0b0","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/c7012d64-5947-4f40-b62f-6c3f232af0b0","50|355e65625b88::2a13bce11d3eb09889706369b114a476"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institute of Process Engineering"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Peltola , J , Karvonen , L , Elfvengren , J , Kolehmainen , J , Kallio , S , Pallares , D & Johnsson , F 2014 , Measurement of solids velocity and concentration in a large cold CFB model . in CFB-11: Proceedings of the 11th International Conference on Fluidized Bed Technology . Institute of Process Engineering , Beijing, China , pp. 589-594 , 11th International Conference on Fluidized Bed Technology, CFB 2014 , Beijing , China , 14/05/14 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Measurement of solids velocity and concentration in a large cold CFB model"}]} +{"author":[{"fullname":"Marszalec, Janusz","name":"Janusz","pid":[],"rank":1,"surname":"Marszalec"},{"fullname":"Järviluoma, Markku","name":"Markku","pid":[],"rank":2,"surname":"Järviluoma"},{"fullname":"Heikkilä, Tapio","name":"Tapio","pid":[],"rank":3,"surname":"Heikkilä"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2021-10-04T12:42:38.338Z","dateoftransformation":"2021-10-05T01:02:19.804Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::b58939ea13408430070558e913a80bdf","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/12.135094"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a5d16720-e901-4605-8c46-2d8d920af7e2"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433591966,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T04:23:59Z","harvestDate":"2021-10-04T12:42:38.338Z","identifier":"oai:cris.vtt.fi:publications/a5d16720-e901-4605-8c46-2d8d920af7e2","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::b58939ea13408430070558e913a80bdf","oai:cris.vtt.fi:publications/a5d16720-e901-4605-8c46-2d8d920af7e2"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"International Society for Optics and Photonics SPIE"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Marszalec , J , Järviluoma , M & Heikkilä , T 1991 , Planning of an active range sensor structure for pose estimation of 3-D regular objects . in Intelligent Robots and Computer Vision X: Neural, Biological, and 3-D Methods . International Society for Optics and Photonics SPIE , Proceedings of SPIE , vol. 1608 , Intelligent Robots and Computer Vision X , Boston , Massachusetts , United States , 10/11/91 . https://doi.org/10.1117/12.135094"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Planning of an active range sensor structure for pose estimation of 3-D regular objects"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software new file mode 100644 index 0000000000..da4dd7e9fb --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software @@ -0,0 +1 @@ +{"author":[{"affiliation":[],"fullname":"Franz, Max","name":"Max","pid":[],"rank":1,"surname":"Franz"},{"affiliation":[],"fullname":"Cheung, Manfred","name":"Manfred","pid":[],"rank":2,"surname":"Cheung"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@mskcc"}],"fullname":"Sumer, Onur","name":"Onur","pid":[],"rank":3,"surname":"Sumer"},{"affiliation":[],"fullname":"Huck, Gerardo","name":"Gerardo","pid":[],"rank":4,"surname":"Huck"},{"affiliation":[],"fullname":"Fong, Dylan","name":"Dylan","pid":[],"rank":5,"surname":"Fong"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Northeastern University"}],"fullname":"Mullen, Tony","name":"Tony","pid":[],"rank":6,"surname":"Mullen"},{"affiliation":[],"fullname":"Ayhun","name":"","pid":[],"rank":7,"surname":""},{"affiliation":[],"fullname":"Chadkin, Bogdan","name":"Bogdan","pid":[],"rank":8,"surname":"Chadkin"},{"affiliation":[],"fullname":"Metincansiper","name":"","pid":[],"rank":9,"surname":""},{"affiliation":[],"fullname":"Stahl, Joseph","name":"Joseph","pid":[],"rank":10,"surname":"Stahl"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Trisotech"}],"fullname":"Gauthier, Mélanie","name":"Mélanie","pid":[],"rank":11,"surname":"Gauthier"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@payoneer"}],"fullname":"Sherer, Eli","name":"Eli","pid":[],"rank":12,"surname":"Sherer"},{"affiliation":[],"fullname":"Bumbu","name":"","pid":[],"rank":13,"surname":""},{"affiliation":[],"fullname":"Sidlovsky, Yaroslav","name":"Yaroslav","pid":[],"rank":14,"surname":"Sidlovsky"},{"affiliation":[],"fullname":"Trott, Rich","name":"Rich","pid":[],"rank":15,"surname":"Trott"},{"affiliation":[],"fullname":"Beynon, Mike","name":"Mike","pid":[],"rank":16,"surname":"Beynon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"University of Toronto"}],"fullname":"Lopes, Christian","name":"Christian","pid":[],"rank":17,"surname":"Lopes"},{"affiliation":[],"fullname":"Li, Alexander","name":"Alexander","pid":[],"rank":18,"surname":"Li"},{"affiliation":[],"fullname":"Cheng, Rui","name":"Rui","pid":[],"rank":19,"surname":"Cheng"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Bio + CS ⇝ ❤"}],"fullname":"Wilzbach, Sebastian","name":"Sebastian","pid":[],"rank":20,"surname":"Wilzbach"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@atlassian"}],"fullname":"Dias, Mike","name":"Mike","pid":[],"rank":21,"surname":"Dias"},{"affiliation":[],"fullname":"Janit Mehta","name":"","pid":[],"rank":22,"surname":""},{"affiliation":[],"fullname":"Meira, Gui","name":"Gui","pid":[],"rank":23,"surname":"Meira"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"InfoTrack"}],"fullname":"Ryding, Daniel","name":"Daniel","pid":[],"rank":24,"surname":"Ryding"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Clarivate Analytics"}],"fullname":"Mosca, Roberto","name":"Roberto","pid":[],"rank":25,"surname":"Mosca"},{"affiliation":[],"fullname":"Jalil, F.","name":"F.","pid":[],"rank":26,"surname":"Jalil"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@testdouble"}],"fullname":"Greenwood, Josh","name":"Josh","pid":[],"rank":27,"surname":"Greenwood"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Adpearance"}],"fullname":"Buchmann, Eric","name":"Eric","pid":[],"rank":28,"surname":"Buchmann"},{"affiliation":[],"fullname":", Dominic","name":"","pid":[],"rank":29,"surname":""},{"affiliation":[],"fullname":"Osborn, Connor","name":"Connor","pid":[],"rank":30,"surname":"Osborn"}],"bestaccessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-29"},"dateofcollection":"2018-10-28T00:39:04.337Z","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Graph theory (network) library for visualisation and analysis"}],"documentationUrl":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doi_________::0682c24009ab5c1624d1c032783feff1","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-29"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694","https://zenodo.org/record/1473694"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694"]},{"accessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"value":"2018-10-29"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://dx.doi.org/10.5281/zenodo.1473694"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-29"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0000","classname":"Unknown","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635435250840,"license":[],"measures":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-10T11:21:15.546Z","identifier":"10.5281/zenodo.1473694","metadataNamespace":""}},"originalId":["50|datacite____::0682c24009ab5c1624d1c032783feff1","10.5281/zenodo.1473694","oai:zenodo.org:1473694","50|od______2659::098cabdf6ac7d52b3d969c89a1ae95ba","50|r37b0ad08687::098cabdf6ac7d52b3d969c89a1ae95ba"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"programmingLanguage":{"classid":"","classname":"","schemeid":"","schemename":""},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Zenodo"},"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2018-10-29"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cytoscape/Cytoscape.Js V3.2.19"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates new file mode 100644 index 0000000000..04ad7a79bd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates @@ -0,0 +1,4 @@ +unresolved::10.17026/dans-x3z-fsq5::doi +unresolved::10.17026/dans-xsw-qtnx::doi +unresolved::10.5281/zenodo.1473694::doi +unresolved::10.17632/fake::doi From 2ca0a436ad60f8110c09379b7c365cdfcfcf0480 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 11 Nov 2021 10:25:42 +0100 Subject: [PATCH 13/15] added SparkResolveEntities node to the oozie wf --- .../resolution/SparkResolveEntities.scala | 17 ++++++++-- .../graph/resolution/oozie_app/workflow.xml | 32 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 60df58e45f..afd195ed04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,9 +2,11 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils +import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkConf import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} @@ -34,13 +36,22 @@ object SparkResolveEntities { val unresolvedPath = parser.get("unresolvedPath") log.info(s"unresolvedPath -> $unresolvedPath") + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) + fs.mkdirs(new Path(workingPath)) resolveEntities(spark, workingPath, unresolvedPath) - - generateResolvedEntities(spark, workingPath, graphBasePath) - } + // TO BE conservative we keep the original entities in the working dir + // and save the resolved entities on the graphBasePath + //In future these lines of code should be removed + entities.foreach { + e => + fs.rename(new Path(s"$graphBasePath/$e"), new Path(s"$workingPath/${e}_old")) + fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) + } + +} def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 52f0e6e9d0..4773fc87c6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -4,6 +4,10 @@ graphBasePath the path of the graph + + unresolvedPath + the path of the unresolved Entities + @@ -36,5 +40,33 @@ + + + + yarn + cluster + Resolve Relations in raw graph + eu.dnetlib.dhp.oa.graph.resolution.SparkResolveEntities + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=10000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --graphBasePath${graphBasePath} + --unresolvedPath${unresolvedPath} + --workingPath${workingDir} + + + + + + \ No newline at end of file From a7763d249205f71a6ae274eb3e4b7c3f53619aaf Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 12 Nov 2021 09:56:45 +0100 Subject: [PATCH 14/15] removed alternate identifier in resolutionMap --- .../resolution/SparkResolveRelation.scala | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index 9e44e017ce..cd517dd5e2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -96,6 +96,21 @@ object SparkResolveRelation { .text(s"$graphBasePath/relation") } + def extractInstanceCF(input: String): List[(String, String)] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + val result: List[(String, String)] = for { + JObject(iObj) <- json \ "instance" + JField("collectedfrom", JObject(cf)) <- iObj + JField("instancetype", JObject(instancetype)) <- iObj + JField("value", JString(collectedFrom)) <- cf + JField("classname", JString(classname)) <- instancetype + } yield (classname, collectedFrom) + + result + + } + def extractPidsFromRecord(input: String): (String, List[(String, String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -108,14 +123,7 @@ object SparkResolveRelation { JField("classid", JString(pidType)) <- qualifier } yield (pidValue, pidType) - val alternateIds: List[(String, String)] = for { - JObject(pids) <- json \\ "alternateIdentifier" - JField("value", JString(pidValue)) <- pids - JField("qualifier", JObject(qualifier)) <- pids - JField("classid", JString(pidType)) <- qualifier - } yield (pidValue, pidType) - - (id, result ::: alternateIds) + (id, result) } From 1f2a3d1af073d79b56499db89956b4fd8fae7096 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 12 Nov 2021 10:15:11 +0100 Subject: [PATCH 15/15] depending on dhp-schemas:2.8.22 (release) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 02bc5d8d4e..274192c676 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.21] + [2.8.22] [4.0.3] [6.0.5] [3.1.6]