From 441701c85c4e41c50bbdd49334234a28ff554c01 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:36:30 +0200 Subject: [PATCH 001/162] DoiBoost AccessRigh #4362 - If multiple licenses are available, take the one applied to 'vor' --- .../doiboost/crossref/Crossref2Oaf.scala | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index fb96717d9f..3f2f63fe67 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -168,12 +168,22 @@ case object Crossref2Oaf { // Mapping instance val instance = new Instance() val license = for { - JString(lic) <- json \ "license" \ "URL" - } yield asField(lic) - val l = license.filter(d => StringUtils.isNotBlank(d.getValue)) - if (l.nonEmpty) - instance.setLicense(l.head) - + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license + JField("content-version", JString(content_version)) <- license + } yield (asField(lic), content_version) + val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) + if (l.nonEmpty){ + if (l exists (d => d._2.equals("vor"))){ + for(d <- l){ + if (d._2.equals("vor")){ + instance.setLicense(d._1) + } + } + } + else{ + instance.setLicense(l.head._1)} + } // Ticket #6281 added pid to Instance instance.setPid(result.getPid) From 981b1018f6459b73205e0c2a90fbcf3fc72c52e8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:43:00 +0200 Subject: [PATCH 002/162] DoiBoost AccessRigh #4362 - decide access right according to licence. Default access right is Unknown --- .../doiboost/DoiBoostMappingUtil.scala | 65 ++++++++++++++++++- .../doiboost/crossref/Crossref2Oaf.scala | 4 +- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index c0939fec12..2558e1c677 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -1,12 +1,16 @@ package eu.dnetlib.doiboost +import java.time.LocalDate +import java.time.format.DateTimeFormatter + import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -118,6 +122,51 @@ object DoiBoostMappingUtil { } + def decideAccessRight(lic : Field[String], date:String) : AccessRight = { + if(lic == null){ + //Default value Unknown + return getUnknownQualifier() + } + val license : String = lic.getValue + //CC licenses + if(license.startsWith("cc") || + license.startsWith("http://creativecommons.org/licenses") || + license.startsWith("https://creativecommons.org/licenses") || + + //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || + + //APA (considered OPEN also by Unpaywall) + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){ + + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + + //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) + if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ + val now = java.time.LocalDate.now + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + + val pub_date = LocalDate.parse(date, formatter) + + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + } + + return getClosedAccessQualifier() + + } + def getOpenAccessQualifier():AccessRight = { @@ -129,6 +178,20 @@ object DoiBoostMappingUtil { } + def getUnknownQualifier():AccessRight = { + OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + + def getEmbargoedAccessQualifier():AccessRight = { + OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + def getClosedAccessQualifier():AccessRight = { + OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + def extractInstance(r:Result):Option[Instance] = { r.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 3f2f63fe67..25f0ff381f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,7 +4,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats @@ -195,7 +195,7 @@ case object Crossref2Oaf { OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS)) } - instance.setAccessright(getRestrictedQualifier()) + instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)) instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) From 6222adf17609ffe956be0bf6df2976363f182d3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:46:31 +0200 Subject: [PATCH 003/162] DoiBoost AccessRigh #4362 - added resources and test for crossref mapping (licence part included) --- .../crossref/CrossrefMappingTest.scala | 97 ++ .../crossref/publication_license_embargo.json | 1537 +++++++++++++++++ .../publication_license_embargo_open.json | 1537 +++++++++++++++++ .../crossref/publication_license_open.json | 1537 +++++++++++++++++ .../crossref/publication_license_vor.json | 1537 +++++++++++++++++ 5 files changed, 6245 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 0fa34d88e9..63555bcbd0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -492,6 +492,103 @@ class CrossrefMappingTest { } + @Test + def testLicenseVorClosed() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_vor.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + + + + + } + + @Test + def testLicenseOpen() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_open.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testLicenseEmbargoOpen() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_open.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testLicenseEmbargo() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json new file mode 100644 index 0000000000..47ca55f34f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json new file mode 100644 index 0000000000..e667f3c7f7 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2020, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json new file mode 100644 index 0000000000..225a36b1ff --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json new file mode 100644 index 0000000000..f2e91a23f7 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://www.springer.com/vor", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file From f4f7c6f9d34ac050e60bf35b42518fdb2082b280 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 13:52:52 +0200 Subject: [PATCH 004/162] DoiBoost AccessRigh #4362 - Unpaywall mapped to OAF with OPEN instance (non oa are filtered out) (unknown hostedby) + map the color as it is --- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index cc758bcae3..0a5ba063fe 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -11,6 +11,7 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color @@ -23,6 +24,21 @@ case class OALocation(evidence:Option[String], host_type:Option[String], is_best object UnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(getClass) + + def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { + if(input.equalsIgnoreCase("close")) + return None + if(input.equalsIgnoreCase("green")) + return Some(OpenAccessRoute.green) + if(input.equalsIgnoreCase("bronze")) + return Some(OpenAccessRoute.bronze) + if(input.equalsIgnoreCase("hybrid")) + return Some(OpenAccessRoute.hybrid) + else + return Some(OpenAccessRoute.gold) + + } + def get_color(is_oa:Boolean, location: OALocation, journal_is_oa:Boolean):Option[OpenAccessRoute] = { if (is_oa) { if (location.host_type.isDefined) { @@ -65,7 +81,7 @@ object UnpayWallToOAF { val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) - val colour = get_color(is_oa, oaLocation, journal_is_oa) + val colour = get_unpaywall_color((json \ "oa_status").extractOrElse[String](null)) pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava) pub.setDataInfo(generateDataInfo()) From 09ad7b2a9e3bc6c64c12d345da56aaccaf3b1fa1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 13:58:05 +0200 Subject: [PATCH 005/162] DoiBoost AccessRigh #4362 - Unpaywall mapped to OAF with OPEN instance (non oa are filtered out) (unknown hostedby) + map the color as it is --- .../src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index 0a5ba063fe..c8324cde16 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -26,7 +26,7 @@ object UnpayWallToOAF { def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { - if(input.equalsIgnoreCase("close")) + if(input == null || input.equalsIgnoreCase("close")) return None if(input.equalsIgnoreCase("green")) return Some(OpenAccessRoute.green) From 34506df1b607241f72f699eafeb4472bc6f11d27 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 10:29:51 +0200 Subject: [PATCH 006/162] DoiBoost AccessRigh #4362 - if the journal is open, the OPEN access right is set to all instances and color is GOLD (overwrite if the color was already set in one of the previous steps) --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 2558e1c677..6a2f0e45f9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -213,8 +213,10 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) - if (item.openAccess) + if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) + i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) + } val ar = getOpenAccessQualifier() publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) } From acd60563309629d6d349f0aa0bdf92a7bf44b8a4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 12:47:10 +0200 Subject: [PATCH 007/162] added shell action to automatically download the new dump and put it in a specified hdfs location --- .../doiboost/preprocess/oozie_app/download.sh | 2 ++ .../preprocess/oozie_app/workflow.xml | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh new file mode 100644 index 0000000000..98984e2491 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!bin/bash +curl -LSs $1 | hdfs dfs -put - $2$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 03f7b7566c..d63e54b8d3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,12 +63,14 @@ + ${wf:conf('resumeFrom') eq 'Skip'} + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -76,6 +78,27 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + download.sh + + + + + + ${jobTracker} From c4b18e6ccb2946d9de0923b5d5bab5521907cae6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 15:01:25 +0200 Subject: [PATCH 008/162] changed the download.sh, added skip step to allow to not execute one phase and changed the workflow sequence of steps --- .../dhp/doiboost/preprocess/oozie_app/download.sh | 2 +- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 +++++++------ .../dhp/doiboost/process/oozie_app/workflow.xml | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh index 98984e2491..dfb0db7082 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -1,2 +1,2 @@ #!bin/bash -curl -LSs $1 | hdfs dfs -put - $2$3 \ No newline at end of file +curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index d63e54b8d3..a1b8804fad 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -70,7 +70,7 @@ ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -78,6 +78,7 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + ${jobTracker} @@ -105,7 +106,7 @@ ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} --outputPath${crossrefDumpPath}/files/ @@ -161,16 +162,16 @@ --targetPath${inputPathCrossref}/crossref_ds - + - - + + - + diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f30..e75e1d8e13 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -75,6 +75,7 @@ + ${wf:conf('resumeFrom') eq 'Skip'} ${wf:conf('resumeFrom') eq 'PreprocessMag'} ${wf:conf('resumeFrom') eq 'PreprocessUW'} ${wf:conf('resumeFrom') eq 'ProcessORCID'} From 199123b74b3a988a2cadf5bec843998e734d494b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 17:30:27 +0200 Subject: [PATCH 009/162] DoiBoost AccessRigh #4362 - Fixed issue on date formatting. Added test method and associated resource --- .../doiboost/DoiBoostMappingUtil.scala | 50 +- .../crossref/CrossrefMappingTest.scala | 21 + .../publication_license_embargo_datetime.json | 1538 +++++++++++++++++ 3 files changed, 1600 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 6a2f0e45f9..b0c6e009b8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -149,18 +149,50 @@ object DoiBoostMappingUtil { //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ val now = java.time.LocalDate.now - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - val pub_date = LocalDate.parse(date, formatter) + try{ + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + }catch { + case e: Exception => { + try{ + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + }catch{ + case ex: Exception => return getClosedAccessQualifier() + } + } - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() - oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) - return oaq - } - else{ - return getEmbargoedAccessQualifier() } + + //val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + + + + // val pub_date = LocalDate.parse(date, formatter) + +// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ +// val oaq : AccessRight = getOpenAccessQualifier() +// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) +// return oaq +// } +// else{ +// return getEmbargoedAccessQualifier() +// } } return getClosedAccessQualifier() diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 63555bcbd0..75fb3f787a 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -589,6 +589,27 @@ class CrossrefMappingTest { } + @Test + def testLicenseEmbargoDateTime() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_datetime.json")).mkString + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json new file mode 100644 index 0000000000..c84e163508 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json @@ -0,0 +1,1538 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +], + "date-time": "2021-05-17T15:08:12Z" +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file From 59530a14fb747b3c29a2fa36dcd2c5fa0e13eebe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 12:34:35 +0200 Subject: [PATCH 010/162] DoiBoost AccessRigh #4362 - set BestAccessRight with the ususal comparator --- .../doiboost/DoiBoostMappingUtil.scala | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index b0c6e009b8..149d8ee5c1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -258,17 +258,18 @@ object DoiBoostMappingUtil { i.setHostedby(hb) }) - val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) - if (ar.nonEmpty) { - if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ - val ar = getOpenAccessQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) - } - else { - val ar = getRestrictedQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) - } - } + publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) +// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) +// if (ar.nonEmpty) { +// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ +// val ar = getOpenAccessQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// } +// else { +// val ar = getRestrictedQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// } +// } publication } From 662c396354d63732d95dba3055ceaccb0fe6a526 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 12:41:14 +0200 Subject: [PATCH 011/162] duplicate the number of partitions in ConvertCrossrefToOaf --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f30..34ca583440 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -99,7 +99,7 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From b420b11ed3c35182aaced4c9df378d2eb3ed0ec8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 18:16:23 +0200 Subject: [PATCH 012/162] duplicate the number of partitions in ProcessMag --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 34ca583440..499e0e0bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -124,7 +124,7 @@ --executor-memory=${sparkExecutorIntersectionMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 1a5b114906107df32c0e2df33dbd2b1e9c34f871 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jul 2021 12:00:23 +0200 Subject: [PATCH 013/162] DoiBoost AccessRigh #4362 - refactoring --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 149d8ee5c1..686a2f1f15 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -249,8 +249,8 @@ object DoiBoostMappingUtil { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) } - val ar = getOpenAccessQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// val ar = getOpenAccessQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) } else { hb = ModelConstants.UNKNOWN_REPOSITORY From 63553a76b3a9e491672c98177be1883a91035fd8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jul 2021 12:01:48 +0200 Subject: [PATCH 014/162] added code to download gold issn list from unibi --- dhp-common/pom.xml | 10 ++ .../project/utils/CSVParser.java | 7 +- .../actionmanager/project/utils/ReadCSV.java | 16 +- .../project/utils/ReadExcel.java | 2 +- .../dhp/actionmanager/project/parameters.json | 5 + dhp-workflows/dhp-doiboost/pom.xml | 7 +- .../main/java/eu/dnetlib/doiboost/GetCSV.java | 37 +++++ .../eu/dnetlib/doiboost/UnibiGoldModel.java | 151 ++++++++++++++++++ .../download_unibi_issn_gold_parameters.json | 39 +++++ .../preprocess/oozie_app/workflow.xml | 14 ++ .../eu/dnetlib/dhp/doiboost/GetCSVTest.java | 43 +++++ .../dhp/doiboost/issn_gold_oa_version_4.csv | 73 +++++++++ .../oa/provision/utils/XmlRecordFactory.java | 2 +- 13 files changed, 398 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf357..b32039e327 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -112,6 +112,16 @@ eu.dnetlib.dhp dhp-schemas + + + org.apache.commons + commons-csv + 1.8 + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 8bdce903b1..1d839bec53 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -16,10 +16,15 @@ import org.apache.commons.lang.reflect.FieldUtils; public class CSVParser { public List parse(String csvFile, String classForName) + throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { + return parse(csvFile, classForName, ';'); + } + + public List parse(String csvFile, String classForName, char delimiter) throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { final CSVFormat format = CSVFormat.EXCEL .withHeader() - .withDelimiter(';') + .withDelimiter(delimiter) .withQuote('"') .withTrim(); List ret = new ArrayList<>(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d1..f9118350f5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -6,6 +6,7 @@ import java.io.Closeable; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -29,6 +30,7 @@ public class ReadCSV implements Closeable { private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; + private final char delimiter; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -44,19 +46,23 @@ public class ReadCSV implements Closeable { final String hdfsPath = parser.get("hdfsPath"); final String hdfsNameNode = parser.get("hdfsNameNode"); final String classForName = parser.get("classForName"); - - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { + Optional delimiter = Optional.ofNullable(parser.get("delimiter")); + char del = ';'; + if (delimiter.isPresent()) + del = delimiter.get().charAt(0); + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) { log.info("Getting CSV file..."); readCSV.execute(classForName); } + } public void execute(final String classForName) throws Exception { CSVParser csvParser = new CSVParser(); csvParser - .parse(csvFile, classForName) + .parse(csvFile, classForName, delimiter) .stream() .forEach(p -> write(p)); @@ -70,7 +76,8 @@ public class ReadCSV implements Closeable { public ReadCSV( final String hdfsPath, final String hdfsNameNode, - final String fileURL) + final String fileURL, + char delimiter) throws Exception { this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); @@ -85,6 +92,7 @@ public class ReadCSV implements Closeable { this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); + this.delimiter = delimiter; } protected void write(final Object p) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681cf..a13d9b791a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -31,7 +31,7 @@ public class ReadExcel implements Closeable { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - ReadCSV.class + ReadExcel.class .getResourceAsStream( "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json index b6c9c94b94..9ccb70a9f9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -28,6 +28,11 @@ "paramLongName" : "sheetName", "paramDescription" : "the name of the sheet in case the file is excel", "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false } diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a29..ea8832754d 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -82,7 +82,12 @@ org.apache.commons commons-text - + + eu.dnetlib.dhp + dhp-aggregation + 1.2.4-SNAPSHOT + compile + diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java new file mode 100644 index 0000000000..00b6b184ba --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java @@ -0,0 +1,37 @@ + +package eu.dnetlib.doiboost; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) { + + log.info("Getting CSV file..."); + readCSV.execute(classForName); + + } + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java new file mode 100644 index 0000000000..e5bd49adae --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java @@ -0,0 +1,151 @@ + +package eu.dnetlib.doiboost; + +import java.io.Serializable; + +public class UnibiGoldModel implements Serializable { + private String ISSN; + private String ISSN_L; + private String ISSN_IN_DOAJ; + private String ISSN_IN_ROAD; + private String ISSN_IN_PMC; + private String ISSN_IN_OAPC; + private String ISSN_IN_WOS; + private String ISSN_IN_SCOPUS; + private String JOURNAL_IN_DOAJ; + private String JOURNAL_IN_ROAD; + private String JOURNAL_IN_PMC; + private String JOURNAL_IN_OAPC; + private String JOURNAL_IN_WOS; + private String JOURNAL_IN_SCOPUS; + private String TITLE; + private String TITLE_SOURCE; + + public String getISSN() { + return ISSN; + } + + public void setISSN(String ISSN) { + this.ISSN = ISSN; + } + + public String getISSN_L() { + return ISSN_L; + } + + public void setISSN_L(String ISSN_L) { + this.ISSN_L = ISSN_L; + } + + public String getISSN_IN_DOAJ() { + return ISSN_IN_DOAJ; + } + + public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) { + this.ISSN_IN_DOAJ = ISSN_IN_DOAJ; + } + + public String getISSN_IN_ROAD() { + return ISSN_IN_ROAD; + } + + public void setISSN_IN_ROAD(String ISSN_IN_ROAD) { + this.ISSN_IN_ROAD = ISSN_IN_ROAD; + } + + public String getISSN_IN_PMC() { + return ISSN_IN_PMC; + } + + public void setISSN_IN_PMC(String ISSN_IN_PMC) { + this.ISSN_IN_PMC = ISSN_IN_PMC; + } + + public String getISSN_IN_OAPC() { + return ISSN_IN_OAPC; + } + + public void setISSN_IN_OAPC(String ISSN_IN_OAPC) { + this.ISSN_IN_OAPC = ISSN_IN_OAPC; + } + + public String getISSN_IN_WOS() { + return ISSN_IN_WOS; + } + + public void setISSN_IN_WOS(String ISSN_IN_WOS) { + this.ISSN_IN_WOS = ISSN_IN_WOS; + } + + public String getISSN_IN_SCOPUS() { + return ISSN_IN_SCOPUS; + } + + public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) { + this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS; + } + + public String getJOURNAL_IN_DOAJ() { + return JOURNAL_IN_DOAJ; + } + + public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) { + this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ; + } + + public String getJOURNAL_IN_ROAD() { + return JOURNAL_IN_ROAD; + } + + public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) { + this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD; + } + + public String getJOURNAL_IN_PMC() { + return JOURNAL_IN_PMC; + } + + public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) { + this.JOURNAL_IN_PMC = JOURNAL_IN_PMC; + } + + public String getJOURNAL_IN_OAPC() { + return JOURNAL_IN_OAPC; + } + + public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) { + this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC; + } + + public String getJOURNAL_IN_WOS() { + return JOURNAL_IN_WOS; + } + + public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) { + this.JOURNAL_IN_WOS = JOURNAL_IN_WOS; + } + + public String getJOURNAL_IN_SCOPUS() { + return JOURNAL_IN_SCOPUS; + } + + public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) { + this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS; + } + + public String getTITLE() { + return TITLE; + } + + public void setTITLE(String TITLE) { + this.TITLE = TITLE; + } + + public String getTITLE_SOURCE() { + return TITLE_SOURCE; + } + + public void setTITLE_SOURCE(String TITLE_SOURCE) { + this.TITLE_SOURCE = TITLE_SOURCE; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json new file mode 100644 index 0000000000..9ccb70a9f9 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json @@ -0,0 +1,39 @@ +[ + + { + "paramName": "fu", + "paramLongName" : "fileURL", + "paramDescription" : "the url of the file to download", + "paramRequired" : true + }, + { + "paramName": "hp", + "paramLongName" : "hdfsPath", + "paramDescription" : "where to save the file", + "paramRequired" : true + }, + { + "paramName": "hnn", + "paramLongName" : "hdfsNameNode", + "paramDescription" : "the name node", + "paramRequired" : true + }, + { + "paramName": "cfn", + "paramLongName" : "classForName", + "paramDescription" : "the name of the class to deserialize the csv to", + "paramRequired" : true +}, { + "paramName": "sn", + "paramLongName" : "sheetName", + "paramDescription" : "the name of the sheet in case the file is excel", + "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false +} + + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 52f958d4d9..4de1a21854 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,6 +63,7 @@ + ${wf:conf('resumeFrom') eq 'DownloadGoldIssn'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} @@ -76,6 +77,19 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + eu.dnetlib.doiboost.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiGoldIssnFileURL} + --hdfsPath${hdfsPath} + --classForNameeu.dnetlib.doiboost.UnibiGoldModel + + + + + + ${jobTracker} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java new file mode 100644 index 0000000000..6cfc90736e --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.doiboost; + +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; + +public class GetCSVTest { + + @Test + public void readUnibiGoldTest() throws Exception { + + String programmecsv = IOUtils + .toString( + getClass() + .getClassLoader() + .getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv")); + + CSVParser csvParser = new CSVParser(); + + List pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ','); + + Assertions.assertEquals(72, pl.size()); + +// ObjectMapper OBJECT_MAPPER = new ObjectMapper(); +// +// pl.forEach(res -> { +// try { +// System.out.println(OBJECT_MAPPER.writeValueAsString(res)); +// } catch (JsonProcessingException e) { +// e.printStackTrace(); +// } +// }); + + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv new file mode 100644 index 0000000000..6d4b6cdcbd --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv @@ -0,0 +1,73 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" +"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ" +"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD" +"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ" +"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ" +"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ" +"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ" +"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ" +"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ" +"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ" +"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ" +"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD" +"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ" +"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD" +"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ" +"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ" +"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ" +"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ" +"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ" +"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD" +"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ" +"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD" +"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD" +"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ" +"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD" +"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ" +"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ" +"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ" +"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ" +"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD" +"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD" +"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ" +"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ" +"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD" +"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ" +"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ" +"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ" diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d23718..2c82402900 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { From f9fbb0f26193c8959c8b8cc527c734a60cfdd9f7 Mon Sep 17 00:00:00 2001 From: antleb Date: Sat, 24 Jul 2021 16:40:28 +0300 Subject: [PATCH 015/162] added indicators second sprint --- .../step16_7-createIndicatorsTables.sql | 197 +++++++++++++++++- 1 file changed, 196 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql index 8998cb9fca..a2fc88a392 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -39,4 +39,199 @@ from publication p join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp -on p.id= tmp.id; \ No newline at end of file +on p.id= tmp.id; + +create table indi_project_pubs_count stored as parquet as +select pr.id id, count(p.id) total_pubs from project_results pr +join publication p on p.id=pr.result +group by pr.id + +create table indi_project_datasets_count stored as parquet as +select pr.id id, count(d.id) total_datasets from project_results pr +join dataset d on d.id=pr.result +group by pr.id + +create table indi_project_software_count stored as parquet as +select pr.id id, count(s.id) total_software from project_results pr +join software s on s.id=pr.result +group by pr.id + +create table indi_project_otherresearch_count stored as parquet as +select pr.id id, count(o.id) total_other from project_results pr +join otherresearchproduct o on o.id=pr.result +group by pr.id + +create table indi_pub_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM publication p + join result_organization ro on p.id=ro.id + join organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + +create table indi_dataset_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM dataset d + join result_organization ro on d.id=ro.id + join organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + +create table indi_software_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM software s + join result_organization ro on s.id=ro.id + join SOURCER.organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + + +create table indi_other_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM otherresearchproduct orp + join result_organization ro on orp.id=ro.id + join organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + +create table indi_pub_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join publication p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofpubs +from total + +create table indi_dataset_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join dataset p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofdataset +from total + +create table indi_software_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join software p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofsoftware +from total + +create table indi_other_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join otherresearchproduct p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofother +from total + +create table indi_other_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from otherresearchproduct_datasources pd +join datasource d on datasource=d.id +join otherresearchproduct p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +from total + +create table indi_software_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from software_datasources pd +join datasource d on datasource=d.id +join software p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +from total + +create table indi_dataset_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from dataset_datasources pd +join datasource d on datasource=d.id +join dataset p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +from total + +create table indi_pub_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from publication_datasources pd +join datasource d on datasource=d.id +join publication p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfPubs +from total + +create table indi_pub_has_cc_licence stored as parquet as +select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license +from publication p +left outer join (select p.id, license.type as lic from publication p +join publication_licenses as license on license.id = p.id +where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +on p.id= tmp.id + +create table indi_pub_has_cc_licence_url stored as parquet as +select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url +from publication p +left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host +from publication p +join publication_licenses as license on license.id = p.id +WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp +on p.id= tmp.id + + +create table indi_pub_has_abstract stored as parquet as +select distinct publication.id, coalesce(abstract, 1) has_abstract +from publication \ No newline at end of file From f3b9570354bd170511c56e9995ac6188601add56 Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 26 Jul 2021 13:00:16 +0300 Subject: [PATCH 016/162] properly invalidating metadata --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh index d5aa207d19..fb944f4ffb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh @@ -13,7 +13,7 @@ echo "Getting file from " $SCRIPT_PATH hdfs dfs -copyToLocal $SCRIPT_PATH echo "Creating indicators" -impala-shell -d ${TARGET} -q "invalidate metadata" +impala-shell -q "invalidate metadata" impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f - cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f - echo "Indicators created" \ No newline at end of file From ed185fd7ed479e385904eb4b8edc4fe821844f5c Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 27 Jul 2021 11:42:47 +0300 Subject: [PATCH 017/162] added missing colons --- .../step16_7-createIndicatorsTables.sql | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql index a2fc88a392..f1ebf0d87f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -44,22 +44,22 @@ on p.id= tmp.id; create table indi_project_pubs_count stored as parquet as select pr.id id, count(p.id) total_pubs from project_results pr join publication p on p.id=pr.result -group by pr.id +group by pr.id; create table indi_project_datasets_count stored as parquet as select pr.id id, count(d.id) total_datasets from project_results pr join dataset d on d.id=pr.result -group by pr.id +group by pr.id; create table indi_project_software_count stored as parquet as select pr.id id, count(s.id) total_software from project_results pr join software s on s.id=pr.result -group by pr.id +group by pr.id; create table indi_project_otherresearch_count stored as parquet as select pr.id id, count(o.id) total_other from project_results pr join otherresearchproduct o on o.id=pr.result -group by pr.id +group by pr.id; create table indi_pub_avg_year_country_oa stored as parquet as select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, @@ -76,7 +76,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on p.id=ro.id join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_dataset_avg_year_country_oa stored as parquet as select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, @@ -93,7 +93,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on d.id=ro.id join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_software_avg_year_country_oa stored as parquet as select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, @@ -110,7 +110,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on s.id=ro.id join SOURCER.organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_other_avg_year_country_oa stored as parquet as @@ -128,7 +128,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on orp.id=ro.id join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_pub_avg_year_context_oa stored as parquet as with total as @@ -138,7 +138,7 @@ join publication p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total +from total; create table indi_dataset_avg_year_context_oa stored as parquet as with total as @@ -148,7 +148,7 @@ join dataset p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total +from total; create table indi_software_avg_year_context_oa stored as parquet as with total as @@ -158,7 +158,7 @@ join software p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total +from total; create table indi_other_avg_year_context_oa stored as parquet as with total as @@ -168,7 +168,7 @@ join otherresearchproduct p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofother -from total +from total; create table indi_other_avg_year_content_oa stored as parquet as with total as @@ -179,7 +179,7 @@ join otherresearchproduct p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total +from total; create table indi_software_avg_year_content_oa stored as parquet as with total as @@ -190,7 +190,7 @@ join software p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total +from total; create table indi_dataset_avg_year_content_oa stored as parquet as with total as @@ -201,7 +201,7 @@ join dataset p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total +from total; create table indi_pub_avg_year_content_oa stored as parquet as with total as @@ -212,7 +212,7 @@ join publication p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total +from total; create table indi_pub_has_cc_licence stored as parquet as select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -220,7 +220,7 @@ from publication p left outer join (select p.id, license.type as lic from publication p join publication_licenses as license on license.id = p.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id +on p.id= tmp.id; create table indi_pub_has_cc_licence_url stored as parquet as select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url @@ -229,9 +229,8 @@ left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host from publication p join publication_licenses as license on license.id = p.id WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp -on p.id= tmp.id - +on p.id= tmp.id; create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract -from publication \ No newline at end of file +from publication; \ No newline at end of file From eb07f7f40f76b9a9c0a3f72549908c4432779558 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 27 Jul 2021 12:27:26 +0200 Subject: [PATCH 018/162] Hosted By Map --- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- .../eu/dnetlib/dhp/transform/input_itgv4.xml | 2 +- ...c_cleaning_OPENAIREplus_compliant_hal_orig | 6 +- ...e_datacite_ExchangeLandingpagePid_orig.xsl | 20 +-- ...enaire_datacite_ExchangeLandingpagePid.xsl | 14 +- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- .../crossref_mapping.csv | 4 +- .../dhp/doiboost/issn_gold_oa_version_4.csv | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 5 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ++++++ .../dhp/oa/graph/hostebymap/Constants.java | 15 ++ .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ++++++++++++ .../SparkPrepareHostedByMapData.scala | 158 ++++++++++++++++++ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ++++++ .../hostebymap/model/UnibiGoldModel.java | 44 +++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 59 +++++++ .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 111 ++++++++++++ .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 2 +- .../dhp/oa/graph/hostedbymap/datasource.json | 10 ++ .../graph/hostedbymap/doaj_transformed.json | 25 +++ .../dhp/oa/graph/hostedbymap/unibiGold.csv | 37 ++++ .../graph/hostedbymap/unibi_transformed.json | 29 ++++ .../dhp/oa/graph/raw/oaf_claim_dedup.xml | 4 +- .../dhp/sx/graph/bio/pubmed/pubmed.xml | 46 ++--- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- 25 files changed, 764 insertions(+), 53 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca4..30138b5bcf 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml index 06325810b4..b421f1342a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml @@ -31,7 +31,7 @@ https://pub.uni-bielefeld.de/record/1997560.json - 0016-9056 + 0016-9056 ger Friedrich diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index d4eb71dc42..505f562941 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -134,9 +134,9 @@ oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-orig oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; // journal data -// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...) -$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))"; -//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))"; +// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the issn (as e.g. print/online/...) +$varISSN = xpath:"//dc:source[starts-with(., 'issn:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'issn:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'issn:')), 1, 4))))"; +//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'issn:'))"; $varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))"; oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";); diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl index 3cfaec80b1..f80c91a6a2 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl @@ -169,7 +169,7 @@ @@ -283,7 +283,7 @@ - + - + - + @@ -793,9 +793,9 @@ - + - + @@ -844,7 +844,7 @@ - + - + - + @@ -594,9 +594,9 @@ - + - + diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca4..30138b5bcf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv index 6a5fc3f871..0c1fd9e64b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv @@ -36,10 +36,10 @@ article-number,String,No,,N/A, published-print,Partial Date,No,Date on which the work was published in print,"Result/relevantDate with Qualifier(""published-print"", ""dnet:dataCite_date"")", published-online,Partial Date,No,Date on which the work was published online,"Result/relevantDate with Qualifier(""published-online"", ""dnet:dataCite_date"")", subject,Array of String,No,"Subject category names, a controlled vocabulary from Sci-Val. Available for most journal articles","Result/subject with Qualifier(""keywords"", ""dnet:subject_classification_typologies""). ","Future improvements: map the controlled vocabulary instead of using the generic ""keywords"" qualifier" -ISSN,Array of String,No,,"Publication/Journal/issn +issn,Array of String,No,,"Publication/Journal/issn Publication/Journal/lissn Publication/Journal/eissn",The mapping depends on the value of issn-type -issn-type,Array of ISSN with Type,No,List of ISSNs with ISSN type information,N/A,Its value guides the setting of the properties in Journal (see row above) +issn-type,Array of issn with Type,No,List of ISSNs with issn type information,N/A,Its value guides the setting of the properties in Journal (see row above) ISBN,Array of String,No,,Publication/source,"In case of Book We can map ISBN and container title on Publication/source using this syntax container-title + ""ISBN: "" + ISBN" archive,Array of String,No,,N/A, license,Array of License,No,,Result/Instance/License, diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv index 6d4b6cdcbd..ebde1bf189 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv @@ -1,4 +1,4 @@ -"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"issn","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" "0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" "0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" "0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 19febb9ed7..665c01276a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -122,6 +122,11 @@ org.json4s json4s-jackson_2.11 + + com.opencsv + opencsv + 5.5 + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala new file mode 100644 index 0000000000..d984183399 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala @@ -0,0 +1,54 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} + + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (!s1.equals("")){ + return s1} + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java new file mode 100644 index 0000000000..b07e33cd19 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java @@ -0,0 +1,15 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap; + +public class Constants { + + + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java new file mode 100644 index 0000000000..d397886a32 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; +import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.conf.Configuration; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + + URLConnection connection = new URL(fileURL).openConnection(); + connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if(shouldReplace){ + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while((line = in.readLine())!= null){ + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + + + writer.close(); + in.close(); + if(shouldReplace){ + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + + } + + + + +} + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala new file mode 100644 index 0000000000..55b6e36cd0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala @@ -0,0 +1,158 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Organization, Publication, Relation} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} +import org.json4s.jackson.Serialization.write + +import scala.collection.mutable.ListBuffer + +object SparkPrepareHostedByMapData { + + case class HostedByInfo(id: Option[String], officialname: String, journal_id: String, provenance : String, id_type: String) {} + + implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + implicit val mapEncoderHBI: Encoder[HostedByInfo] = Encoders.product[HostedByInfo] + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + val openaire: HostedByInfo = input._1._1 + val doaj: HostedByInfo = input._1._2 + val gold: HostedByInfo = input._2 + val isOpenAccess: Boolean = doaj == null && gold == null + + openaire.journal_id match { + case Constants.ISSN => return HostedByItemType(openaire.id.get, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => return HostedByItemType(openaire.id.get, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => return HostedByItemType(openaire.id.get, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + + // catch the default with a variable so you can print it + case whoa => return null + } + } + + def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { + implicit val formats = DefaultFormats + val serializedJSON:String = write(input) + + var hostedBy = new ListBuffer[String]() + if(!input.issn.equals("")){ + hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" + } + if(!input.eissn.equals("")){ + hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" + } + if(!input.lissn.equals("")){ + hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" + } + + hostedBy + + } + + + def readOADataset(input:String, spark: SparkSession): Dataset[HostedByInfo] = { + spark.read.textFile(input).as[Datasource].flatMap(ds => { + val lst = new ListBuffer[HostedByInfo]() + if (ds.getJournal == null) { + return null + } + val issn: String = ds.getJournal.getIssnPrinted + val issnl: String = ds.getJournal.getIssnOnline + val eissn: String = ds.getJournal.getIssnOnline + val id: String = ds.getId + val officialname: String = ds.getOfficialname.getValue + if (issn != null) { + lst += HostedByInfo(Some(id), officialname, issn, Constants.OPENAIRE, Constants.ISSN) + } + if (issnl != null) { + lst += HostedByInfo(Some(id), officialname, issnl, Constants.OPENAIRE, Constants.ISSNL) + } + if (eissn != null) { + lst += HostedByInfo(Some(id), officialname, eissn, Constants.OPENAIRE, Constants.EISSN) + } + lst + }).filter(i => i != null) + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + import spark.implicits._ + + val datasourcePath = parser.get("datasourcePath") + val workingDirPath = parser.get("workingPath") + + + + + logger.info("Getting the Datasources") + + val doajDataset: Dataset[DOAJModel] = spark.read.load(workingDirPath + "/doaj").as[DOAJModel] + val unibiDataset: Dataset[UnibiGoldModel] = spark.read.load(datasourcePath).as[UnibiGoldModel] + + val oa: Dataset[HostedByInfo] = readOADataset(datasourcePath, spark) + + val doaj: Dataset[HostedByInfo] = doajDataset.flatMap(doaj => { + val lst = new ListBuffer[HostedByInfo]() + val issn: String = doaj.getIssn + val eissn: String = doaj.getEissn + val officialname: String = doaj.getJournalTitle + if (issn != null) { + lst += HostedByInfo(null, officialname, issn, Constants.DOAJ, Constants.ISSN) + } + if (eissn != null) { + lst += HostedByInfo(null, officialname, eissn, Constants.DOAJ, Constants.EISSN) + } + lst + }) + + val gold: Dataset[HostedByInfo] = unibiDataset.flatMap(gold => { + val lst = new ListBuffer[HostedByInfo]() + val issn: String = gold.getIssn + val issnl: String = gold.getIssn_l + val officialname: String = gold.getTitle + if (issn != null) { + lst += HostedByInfo(null, officialname, issn, Constants.UNIBI, Constants.ISSN) + } + if (issnl != null) { + lst += HostedByInfo(null, officialname, issnl, Constants.UNIBI, Constants.ISSNL) + } + lst + }) + + Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") + .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) + .filter(i => i != null)) + .flatMap(toHostedByMap) + // .map(i => (i.id,i)) + // .groupByKey(_._1) + // .agg(hostedByAggregator.toColumn) + // .map(p => p._2) + .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") + + + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java new file mode 100644 index 0000000000..fe1d14a763 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn ; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java new file mode 100644 index 0000000000..309f74eea5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java @@ -0,0 +1,44 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap.model; + +import com.opencsv.bean.CsvBindByName; + +import java.io.Serializable; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala new file mode 100644 index 0000000000..657f20fce4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -0,0 +1,59 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import java.sql.Timestamp + +import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData +import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData.HostedByInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats +import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} +import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +class TestPreprocess { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + @Test + def readDatasource():Unit = { + + + import org.apache.spark.sql.Encoders + implicit val formats = DefaultFormats + import org.json4s.jackson.Serialization.write + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + + val schema = Encoders.product[HostedByInfo] + + spark.read.textFile(path).foreach(r => println(mapper.writeValueAsString(r))) + +// SparkPrepareHostedByMapData.readOADataset(path, spark) +// .foreach(r => println(write(r))) + + + spark.close() + } + + + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java new file mode 100644 index 0000000000..01c70502c7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; +import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; +import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; +import org.junit.jupiter.api.Test; + +import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; + +public class TestReadCSV { + + @Test + public void testCSVUnibi() throws FileNotFoundException { + + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); + + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); + + ObjectMapper mapper = new ObjectMapper(); + + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + + } + + @Test + public void testCSVUrlUnibi() throws IOException { + + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> + + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + + ); + } + + @Test + public void testCSVUrlDOAJ() throws IOException { + + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while((line = in.readLine())!= null){ + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); + + + + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> + + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + + ); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c8..4db5fd463d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json new file mode 100644 index 0000000000..15e1dcc45a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -0,0 +1,10 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json new file mode 100644 index 0000000000..9cec80eb48 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -0,0 +1,25 @@ +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} +{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} +{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} +{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} +{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} +{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} +{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} +{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv new file mode 100644 index 0000000000..eb5d93451a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv @@ -0,0 +1,37 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json new file mode 100644 index 0000000000..d4acba4a9c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -0,0 +1,29 @@ +{"issn":"2502-731X","issn_l":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","title_source":"ROAD"} +{"issn":"2502-7409","issn_l":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","title_source":"ROAD"} +{"issn":"2502-7433","issn_l":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","title_source":"ROAD"} +{"issn":"2502-745X","issn_l":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","title_source":"ROAD"} +{"issn":"2502-7549","issn_l":"2502-7549","title":"ELang journal (An English Education journal)","title_source":"ROAD"} +{"issn":"2423-3633","issn_l":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","title_source":"ROAD"} +{"issn":"2423-5563","issn_l":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","title_source":"ROAD"} +{"issn":"1735-434X","issn_l":"1735-434X","title":"Iranian journal of animal biosystematics.","title_source":"ROAD"} +{"issn":"2423-4435","issn_l":"2008-6113","title":"Majallah-i jangal-i Īrān.","title_source":"ROAD"} +{"issn":"2423-4575","issn_l":"2423-4575","title":"Ābziyān-i zinatī.","title_source":"ROAD"} +{"issn":"2423-4974","issn_l":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","title_source":"ROAD"} +{"issn":"2380-0607","issn_l":"2380-0607","title":"AIHM journal club.","title_source":"ROAD"} +{"issn":"1085-4568","issn_l":"1085-4568","title":"Frontiers.","title_source":"ROAD"} +{"issn":"2380-8845","issn_l":"2380-8845","title":"˜The œjournal of contemporary archival studies.","title_source":"ROAD"} +{"issn":"2381-1803","issn_l":"2381-1803","title":"International journal of complementary & alternative medicine.","title_source":"ROAD"} +{"issn":"2381-2478","issn_l":"2381-2478","title":"Palapala.","title_source":"ROAD"} +{"issn":"2382-5170","issn_l":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","title_source":"ROAD"} +{"issn":"2382-9737","issn_l":"2382-9737","title":"Majallah-i salāmat va bihdāsht","title_source":"ROAD"} +{"issn":"2382-977X","issn_l":"2382-977X","title":"UCT journal of research in science ,engineering and technology","title_source":"ROAD"} +{"issn":"2382-9974","issn_l":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","title_source":"ROAD"} +{"issn":"2227-4782","issn_l":"2227-4782","title":"Problemi endokrinnoï patologìï.","title_source":"ROAD"} +{"issn":"2685-0079","issn_l":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","title_source":"ROAD"} +{"issn":"2574-0075","issn_l":"2574-0075","title":"Hypermedia magazine.","title_source":"ROAD"} +{"issn":"2574-0296","issn_l":"2574-0296","title":"˜The œmuseum review.","title_source":"ROAD"} +{"issn":"2574-0334","issn_l":"2574-0334","title":"Bioactive compounds in health and disease.","title_source":"ROAD"} +{"issn":"2574-108X","issn_l":"2574-108X","title":"Journal of computer science integration.","title_source":"ROAD"} +{"issn":"2574-254X","issn_l":"2574-254X","title":"Child and adolescent obesity.","title_source":"ROAD"} +{"issn":"2574-3325","issn_l":"2574-3325","title":"Journal of research on the college president.","title_source":"ROAD"} +{"issn":"2239-6101","issn_l":"2239-5938","title":"European journal of sustainable development.","title_source":"ROAD"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml index 95457fb701..b45fa1edba 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml @@ -16,7 +16,7 @@ Doğuş Üniversitesi, Fen Edebiyat Fakültesi, Fizik Bölümü TR3959 urn:issn:1748-0221 - VOLUME=7;ISSUE=1;ISSN=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSUE=1;issn=1748-0221;TITLE=Journal of Instrumentation ATLAS Collaboration Mitsou, Vasiliki Fiorini, Luca Ros Martínez, Eduardo Castillo Giménez, María Victoria Fuster Verdú, Juan A. García García, Carmen Cabrera Urbán, Susana Martí García, Salvador Salt Cairols, José Lacasta Llácer, Carlos Valls Ferrer, @@ -30,7 +30,7 @@ interactions. Journal of Instrumentation, 7(1). doi: 10.1088/1748-0221/7/01/P01013. UC Santa Cruz: Retrieved from: http://www.escholarship.org/uc/item/05j2j2br Journal of Instrumentation, 7 - VOLUME=7;ISSN=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;issn=1748-0221;TITLE=Journal of Instrumentation 1748-0221 Journal of Instrumentation 7, P01013 (2012). doi:10.1088/1748-0221/7/01/P01013 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml index 22da07e299..06ebe97cd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml @@ -16,7 +16,7 @@
- 0006-2944 + 0006-2944 13 2 @@ -182,7 +182,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -314,7 +314,7 @@
- 0006-291X + 0006-291X 66 4 @@ -460,7 +460,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -644,7 +644,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -774,7 +774,7 @@
- 0006-291X + 0006-291X 66 4 @@ -934,7 +934,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -1614,7 +1614,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -2017,7 +2017,7 @@
- 0006-2952 + 0006-2952 24 17 @@ -2185,7 +2185,7 @@
- 1873-2968 + 1873-2968 24 17 @@ -2337,7 +2337,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2585,7 +2585,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2838,7 +2838,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3020,7 +3020,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3204,7 +3204,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3450,7 +3450,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3683,7 +3683,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3880,7 +3880,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4069,7 +4069,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4428,7 +4428,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4590,7 +4590,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4741,7 +4741,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4999,7 +4999,7 @@
- 0004-4172 + 0004-4172 25 9 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca4..30138b5bcf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 1a28a69cac1031bda96929c5a6512f52d8fdda2d Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 27 Jul 2021 15:14:09 +0300 Subject: [PATCH 019/162] changed the citeee in *_citations to cites --- .../graph/stats/oozie_app/scripts/step2.sql | 23 ++----------------- .../graph/stats/oozie_app/scripts/step3.sql | 21 ++--------------- .../graph/stats/oozie_app/scripts/step4.sql | 21 ++--------------- .../graph/stats/oozie_app/scripts/step5.sql | 21 ++--------------- 4 files changed, 8 insertions(+), 78 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index 75b24b1893..bb0d0ac6ca 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -90,27 +90,8 @@ FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.publication_citations AS -SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result +SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and p.datainfo.deletedbyinference = false; - --- ANALYZE TABLE ${stats_db_name}.publication_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + and p.datainfo.deletedbyinference = false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 540cc03a51..953eaad6a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -41,7 +41,7 @@ FROM ${openaire_db_name}.dataset d WHERE d.datainfo.deletedbyinference = FALSE; CREATE TABLE ${stats_db_name}.dataset_citations AS -SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result +SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" @@ -95,21 +95,4 @@ CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; --- --- ANALYZE TABLE ${stats_db_name}.dataset_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +where p.datainfo.deletedbyinference = false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 54345e0741..0210dc8cb9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -41,7 +41,7 @@ from ${openaire_db_name}.software s where s.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.software_citations AS -SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT +SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" @@ -95,21 +95,4 @@ CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; --- --- ANALYZE TABLE ${stats_db_name}.software_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +where p.datainfo.deletedbyinference = false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index 36ad5d92a8..f7b302186f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -41,7 +41,7 @@ WHERE o.datainfo.deletedbyinference = FALSE; -- Otherresearchproduct_citations CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS -SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT +SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and o.datainfo.deletedbyinference = false; @@ -86,21 +86,4 @@ where p.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; - --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +where p.datainfo.deletedbyinference = false; \ No newline at end of file From 5aa7d16d1b1bf2f100081f491152db4fb7ac90a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 27 Jul 2021 15:11:37 +0200 Subject: [PATCH 020/162] updated assertions in eu.dnetlib.dhp.oa.graph.raw.MappersTest --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 58 ++++++++----------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5b229a6255..c41a6c68c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,17 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.lenient; - -import java.io.IOException; -import java.util.List; -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -20,22 +16,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.util.List; +import java.util.Optional; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -74,7 +60,7 @@ public class MappersTest { assertValidId(p.getId()); - assertEquals(1, p.getOriginalId().size()); + assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().contains("10.3897/oneeco.2.e13718")); assertValidId(p.getCollectedfrom().get(0).getKey()); @@ -261,8 +247,8 @@ public class MappersTest { final Relation r2 = (Relation) list.get(2); assertValidId(d.getId()); - assertEquals(1, d.getOriginalId().size()); - assertTrue(d.getOriginalId().contains("oai:zenodo.org:3234526")); + assertEquals(2, d.getOriginalId().size()); + assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); @@ -351,8 +337,11 @@ public class MappersTest { final Publication p = (Publication) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); + //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); @@ -413,7 +402,8 @@ public class MappersTest { assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); assertValidId(d.getId()); - assertTrue(d.getOriginalId().size() == 1); + assertEquals(2, d.getOriginalId().size()); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -663,8 +653,8 @@ public class MappersTest { final Dataset p = (Dataset) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("df76e73f-0483-49a4-a9bb-63f2f985574a", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("df76e73f-0483-49a4-a9bb-63f2f985574a"))); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); From 825d9f02897c07f64880650ee6b58ccb4f3fcf2d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 27 Jul 2021 16:09:30 +0200 Subject: [PATCH 021/162] fixed datacite workflow starting from Importing delta --- .../datacite/DataciteToOAFTransformation.scala | 2 +- .../actionmanager/datacite/oozie_app/workflow.xml | 2 +- .../datacite/DataciteToOAFTest.scala | 15 ++++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 045927bed1..cfdd98d30c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -367,7 +367,7 @@ object DataciteToOAFTransformation { result.setDateofcollection(ISO8601FORMAT.format(d)) - result.setDateoftransformation(ISO8601FORMAT.format(ts)) + result.setDateoftransformation(ISO8601FORMAT.format(d)) result.setDataInfo(dataInfo) val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 036178b37b..021704f548 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -16,7 +16,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala index 0d10c41dca..a795a910da 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala @@ -3,13 +3,14 @@ package eu.dnetlib.dhp.actionmanager.datacite import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.SerializationFeature - import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension +import java.text.SimpleDateFormat +import java.util.Locale import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) @@ -22,6 +23,18 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ super.setUpVocabulary() } + + @Test + def testDateMapping:Unit = { + val inputDate = "2021-07-14T11:52:54+0000" + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val dt = ISO8601FORMAT.parse(inputDate) + println(dt.getTime) + + + } + + @Test def testMapping() :Unit = { val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString From 0424f47494ffcc792fc4d9c7dc4e79407dc105c5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 10:24:13 +0200 Subject: [PATCH 022/162] HostedByMap fixing issues --- .../dhp/oa/graph/hostebymap/Aggregators.scala | 2 +- .../SparkPrepareHostedByMapData.scala | 240 +++++++++++------- .../oa/graph/hostedbymap/TestPreprocess.scala | 85 ++++++- .../dhp/oa/graph/hostedbymap/datasource.json | 20 +- 4 files changed, 234 insertions(+), 113 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala index d984183399..561d2bbf45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala @@ -5,7 +5,7 @@ import org.apache.spark.sql.expressions.Aggregator case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} - +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} object Aggregators { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala index 55b6e36cd0..b66e972818 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala @@ -2,27 +2,23 @@ package eu.dnetlib.dhp.oa.graph.hostebymap import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.oa.merge.AuthorMerger -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Organization, Publication, Relation} +import eu.dnetlib.dhp.schema.oaf.{Datasource} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import org.json4s.jackson.Serialization.write -import scala.collection.mutable.ListBuffer +import com.fasterxml.jackson.databind.ObjectMapper object SparkPrepareHostedByMapData { - case class HostedByInfo(id: Option[String], officialname: String, journal_id: String, provenance : String, id_type: String) {} implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] - implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] - implicit val mapEncoderHBI: Encoder[HostedByInfo] = Encoders.product[HostedByInfo] + + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { @@ -32,59 +28,144 @@ object SparkPrepareHostedByMapData { val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => return HostedByItemType(openaire.id.get, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => return HostedByItemType(openaire.id.get, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => return HostedByItemType(openaire.id.get, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => return null + case whoa => null } } - def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { - implicit val formats = DefaultFormats - val serializedJSON:String = write(input) +// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { +// implicit val formats = DefaultFormats +// val serializedJSON:String = write(input) +// +// var hostedBy = new ListBuffer[String]() +// if(!input.issn.equals("")){ +// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" +// } +// if(!input.eissn.equals("")){ +// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" +// } +// if(!input.lissn.equals("")){ +// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" +// } +// +// hostedBy +// +// } - var hostedBy = new ListBuffer[String]() + def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { + if(issn != null){ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, issn, eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, issn, "", issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, "", "" , oa) + } + } + }else{ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, "", eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, "", eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, "", "", issnl , oa) + }else{ + HostedByItemType("", "", "", "", "" , oa) + } + } + } + } + + def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { + if (dats.getJournal != null) { + + return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) + } + HostedByItemType("","","","","",false) + } + + def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + + import spark.implicits._ + + + val mapper = new ObjectMapper() + + implicit var encoderD = Encoders.kryo[Datasource] + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + + def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssn_l, true) + } + + + def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) + + dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { + + return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) + } + + def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[DOAJModel])) + + dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { + var lst : List[(String, HostedByItemType)] = List() if(!input.issn.equals("")){ - hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" + lst = (input.issn, input) :: lst } if(!input.eissn.equals("")){ - hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" + lst = (input.eissn, input) :: lst } if(!input.lissn.equals("")){ - hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" + lst = (input.lissn, input) :: lst } - - hostedBy - + lst } - def readOADataset(input:String, spark: SparkSession): Dataset[HostedByInfo] = { - spark.read.textFile(input).as[Datasource].flatMap(ds => { - val lst = new ListBuffer[HostedByInfo]() - if (ds.getJournal == null) { - return null - } - val issn: String = ds.getJournal.getIssnPrinted - val issnl: String = ds.getJournal.getIssnOnline - val eissn: String = ds.getJournal.getIssnOnline - val id: String = ds.getId - val officialname: String = ds.getOfficialname.getValue - if (issn != null) { - lst += HostedByInfo(Some(id), officialname, issn, Constants.OPENAIRE, Constants.ISSN) - } - if (issnl != null) { - lst += HostedByInfo(Some(id), officialname, issnl, Constants.OPENAIRE, Constants.ISSNL) - } - if (eissn != null) { - lst += HostedByInfo(Some(id), officialname, eissn, Constants.OPENAIRE, Constants.EISSN) - } - lst - }).filter(i => i != null) - } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -105,53 +186,34 @@ object SparkPrepareHostedByMapData { + implicit val formats = DefaultFormats + logger.info("Getting the Datasources") - val doajDataset: Dataset[DOAJModel] = spark.read.load(workingDirPath + "/doaj").as[DOAJModel] - val unibiDataset: Dataset[UnibiGoldModel] = spark.read.load(datasourcePath).as[UnibiGoldModel] + // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val oa: Dataset[HostedByInfo] = readOADataset(datasourcePath, spark) - - val doaj: Dataset[HostedByInfo] = doajDataset.flatMap(doaj => { - val lst = new ListBuffer[HostedByInfo]() - val issn: String = doaj.getIssn - val eissn: String = doaj.getEissn - val officialname: String = doaj.getJournalTitle - if (issn != null) { - lst += HostedByInfo(null, officialname, issn, Constants.DOAJ, Constants.ISSN) - } - if (eissn != null) { - lst += HostedByInfo(null, officialname, eissn, Constants.DOAJ, Constants.EISSN) - } - lst - }) - - val gold: Dataset[HostedByInfo] = unibiDataset.flatMap(gold => { - val lst = new ListBuffer[HostedByInfo]() - val issn: String = gold.getIssn - val issnl: String = gold.getIssn_l - val officialname: String = gold.getTitle - if (issn != null) { - lst += HostedByInfo(null, officialname, issn, Constants.UNIBI, Constants.ISSN) - } - if (issnl != null) { - lst += HostedByInfo(null, officialname, issnl, Constants.UNIBI, Constants.ISSNL) - } - lst - }) - - Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") - .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) - .filter(i => i != null)) - .flatMap(toHostedByMap) - // .map(i => (i.id,i)) - // .groupByKey(_._1) - // .agg(hostedByAggregator.toColumn) - // .map(p => p._2) - .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") + val dats : Dataset[HostedByItemType] = + oaHostedByDataset(spark, datasourcePath) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + dats.flatMap(hbi => toList(hbi)) + .groupByKey(_._1) +// +// + +// + +// +// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") +// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) +// .filter(i => i != null)) +// .flatMap(toHostedByMap) +// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") +// +// } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 657f20fce4..8e5657bfc1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -2,22 +2,23 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import java.sql.Timestamp -import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData -import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData.HostedByInfo +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, SparkSession} -import org.codehaus.jackson.map.ObjectMapper +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test import org.slf4j.{Logger, LoggerFactory} +import scala.collection.mutable.ListBuffer import scala.io.Source -class TestPreprocess { +class TestPreprocess extends java.io.Serializable{ - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource] + implicit val schema = Encoders.product[HostedByInfo] @@ -27,7 +28,11 @@ class TestPreprocess { import org.apache.spark.sql.Encoders implicit val formats = DefaultFormats - import org.json4s.jackson.Serialization.write + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + val conf = new SparkConf() conf.setMaster("local[*]") @@ -41,12 +46,67 @@ class TestPreprocess { val path = getClass.getResource("datasource.json").getPath - val schema = Encoders.product[HostedByInfo] + println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) - spark.read.textFile(path).foreach(r => println(mapper.writeValueAsString(r))) -// SparkPrepareHostedByMapData.readOADataset(path, spark) -// .foreach(r => println(write(r))) + + spark.close() + } + + + @Test + def readGold():Unit = { + + implicit val formats = DefaultFormats + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("unibi_transformed.json").getPath + + + println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + + + + spark.close() + } + + @Test + def readDoaj():Unit = { + + implicit val formats = DefaultFormats + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("doaj_transformed.json").getPath + + + println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + spark.close() @@ -55,5 +115,4 @@ class TestPreprocess { - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 15e1dcc45a..818aaa7167 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,10 +1,10 @@ -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.energyret.ru/jour/"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file From 16c91203bd434664fdb8b8a3633fb683880039c5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jul 2021 10:30:49 +0200 Subject: [PATCH 023/162] implemented workflow of creation action set for scholexplorer --- .../datacite/AbstractRestClient.scala | 13 +-- .../datacite/ImportDatacite.scala | 2 +- .../sx/provision/SparkCreateActionset.scala | 90 +++++++++++++++++++ .../dhp/sx/provision/SparkSaveActionSet.scala | 86 ++++++++++++++++++ .../dhp/sx/actionset/generate_actionset.json | 6 ++ .../sx/actionset/oozie_app/config-default.xml | 23 +++++ .../dhp/sx/actionset/oozie_app/workflow.xml | 76 ++++++++++++++++ .../dhp/sx/actionset/save_actionset.json | 5 ++ 8 files changed, 295 insertions(+), 6 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 8df2032830..823187afe6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.actionmanager.datacite import org.apache.commons.io.IOUtils +import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest} import org.apache.http.entity.StringEntity -import org.apache.http.impl.client.HttpClients +import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} import java.io.IOException @@ -56,11 +57,15 @@ abstract class AbstractRestClient extends Iterator[String]{ private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={ - val client = HttpClients.createDefault + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() var tries = 4 try { while (tries > 0) { - println(s"requesting ${r.getURI}") val response = client.execute(r) println(s"get response with status${response.getStatusLine.getStatusCode}") @@ -80,7 +85,5 @@ abstract class AbstractRestClient extends Iterator[String]{ throw new RuntimeException("Unable to close client ", e) } } - getBufferData() - } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala index 931ac06f64..2b73d29559 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala @@ -140,7 +140,7 @@ object ImportDatacite { private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = { var from:Long = timestamp * 1000 - val delta:Long = 50000000L + val delta:Long = 100000000L var client: DataciteAPIImporter = null val now :Long =System.currentTimeMillis() var i = 0 diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala new file mode 100644 index 0000000000..6f0cdcf8aa --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala @@ -0,0 +1,90 @@ +package eu.dnetlib.dhp.sx.provision + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} +import org.apache.spark.{SparkConf, sql} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +object SparkCreateActionset { + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString) + parser.parseArgument(args) + + + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val workingDirFolder = parser.get("workingDirFolder") + log.info(s"workingDirFolder -> $workingDirFolder") + + implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoders:Encoder[Result] = Encoders.kryo[Result] + implicit val relationEncoders:Encoder[Relation] = Encoders.kryo[Relation] + + import spark.implicits._ + + val relation = spark.read.load(s"$sourcePath/relation").as[Relation] + + relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.save(s"$workingDirFolder/id_relation") + + + val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] + + log.info("extract source and target Identifier involved in relations") + + + log.info("save relation filtered") + + relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving publication") + + val publication:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/publication").as[Result].map(p => (p.getId, p)) + + publication + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving dataset") + val dataset:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/dataset").as[Result].map(p => (p.getId, p)) + dataset + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving software") + val software:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/software").as[Result].map(p => (p.getId, p)) + software + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving Other Research product") + val orp:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/otherresearchproduct").as[Result].map(p => (p.getId, p)) + orp + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala new file mode 100644 index 0000000000..d1d0b84242 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala @@ -0,0 +1,86 @@ +package eu.dnetlib.dhp.sx.provision + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.action.AtomicAction +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import org.apache.hadoop.io.Text +import org.apache.hadoop.io.compress.GzipCodec +import org.apache.hadoop.mapred.SequenceFileOutputFormat +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +object SparkSaveActionSet { + + + def toActionSet(item: Oaf): (String, String) = { + val mapper = new ObjectMapper() + + item match { + case dataset: OafDataset => + val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] + a.setClazz(classOf[OafDataset]) + a.setPayload(dataset) + (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case publication: Publication => + val a: AtomicAction[Publication] = new AtomicAction[Publication] + a.setClazz(classOf[Publication]) + a.setPayload(publication) + (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case software: Software => + val a: AtomicAction[Software] = new AtomicAction[Software] + a.setClazz(classOf[Software]) + a.setPayload(software) + (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case orp: OtherResearchProduct => + val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] + a.setClazz(classOf[OtherResearchProduct]) + a.setPayload(orp) + (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) + + case relation: Relation => + val a: AtomicAction[Relation] = new AtomicAction[Relation] + a.setClazz(classOf[Relation]) + a.setPayload(relation) + (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case _ => + null + } + + } + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString) + parser.parseArgument(args) + + + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) + + spark.read.load(sourcePath).as[Oaf] + .map(o =>toActionSet(o)) + .filter(o => o!= null) + .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) + + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json new file mode 100644 index 0000000000..0563808ea8 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json @@ -0,0 +1,6 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "source path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingDirFolder","paramDescription": "the working Dir Folder", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the target path ", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml new file mode 100644 index 0000000000..dd3c32c620 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml @@ -0,0 +1,23 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml new file mode 100644 index 0000000000..ef86a17728 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -0,0 +1,76 @@ + + + + sourcePath + the path of the consistent graph + + + workingDirFolder + the path of working dir ActionSet + + + outputPath + the path of Scholexplorer ActionSet + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + Create Action Set + eu.dnetlib.dhp.sx.provision.SparkCreateActionset + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --targetPath${outputPath} + --workingDirFolder${workingDirFolder} + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Save Action Set + eu.dnetlib.dhp.sx.provision.SparkSaveActionSet + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${workingDirFolder}/actionSetOaf + --targetPath${outputPath} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json new file mode 100644 index 0000000000..0264c825f0 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "source path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the target path ", "paramRequired": true} +] \ No newline at end of file From 43e62fcae92b9c955201d0b2f493d22cc8b32744 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:04:55 +0200 Subject: [PATCH 024/162] DoiBoost AccessRigh #4362 - related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/126/files#issuecomment-4193 --- .../dnetlib/doiboost/DoiBoostMappingUtil.scala | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f15..d018948fc2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -179,20 +179,6 @@ object DoiBoostMappingUtil { } - //val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - - - - // val pub_date = LocalDate.parse(date, formatter) - -// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ -// val oaq : AccessRight = getOpenAccessQualifier() -// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) -// return oaq -// } -// else{ -// return getEmbargoedAccessQualifier() -// } } return getClosedAccessQualifier() @@ -206,12 +192,12 @@ object DoiBoostMappingUtil { } def getRestrictedQualifier():AccessRight = { - OafMapperUtils.accessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight( "RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } def getUnknownQualifier():AccessRight = { - OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } From 5fe016dcbccda43276342efd1547276d723ff5db Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:14:28 +0200 Subject: [PATCH 025/162] DoiBoost AccessRigh #4362 - related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/126/files#issuecomment-4194 --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index d018948fc2..ea65fc747b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -188,7 +188,8 @@ object DoiBoostMappingUtil { def getOpenAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + + OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN,"Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } def getRestrictedQualifier():AccessRight = { From 80d5b3b4deb5098ba85dd701a35cfecf82b28ef9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:16:49 +0200 Subject: [PATCH 026/162] DoiBoost AccessRigh #4362 - removing commented code --- .../eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index ea65fc747b..e688804337 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -236,8 +236,7 @@ object DoiBoostMappingUtil { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) } -// val ar = getOpenAccessQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) + } else { hb = ModelConstants.UNKNOWN_REPOSITORY @@ -246,17 +245,7 @@ object DoiBoostMappingUtil { }) publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) -// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) -// if (ar.nonEmpty) { -// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ -// val ar = getOpenAccessQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) -// } -// else { -// val ar = getRestrictedQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) -// } -// } + publication } From 3d2bba3d5d6f346d3fed3fc80511e9bed78fa92c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:25:43 +0200 Subject: [PATCH 027/162] removing not needed classes --- .../main/java/eu/dnetlib/doiboost/GetCSV.java | 37 ----- .../eu/dnetlib/doiboost/UnibiGoldModel.java | 151 ------------------ 2 files changed, 188 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java deleted file mode 100644 index 00b6b184ba..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java +++ /dev/null @@ -1,37 +0,0 @@ - -package eu.dnetlib.doiboost; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) { - - log.info("Getting CSV file..."); - readCSV.execute(classForName); - - } - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java deleted file mode 100644 index e5bd49adae..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java +++ /dev/null @@ -1,151 +0,0 @@ - -package eu.dnetlib.doiboost; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - private String ISSN; - private String ISSN_L; - private String ISSN_IN_DOAJ; - private String ISSN_IN_ROAD; - private String ISSN_IN_PMC; - private String ISSN_IN_OAPC; - private String ISSN_IN_WOS; - private String ISSN_IN_SCOPUS; - private String JOURNAL_IN_DOAJ; - private String JOURNAL_IN_ROAD; - private String JOURNAL_IN_PMC; - private String JOURNAL_IN_OAPC; - private String JOURNAL_IN_WOS; - private String JOURNAL_IN_SCOPUS; - private String TITLE; - private String TITLE_SOURCE; - - public String getISSN() { - return ISSN; - } - - public void setISSN(String ISSN) { - this.ISSN = ISSN; - } - - public String getISSN_L() { - return ISSN_L; - } - - public void setISSN_L(String ISSN_L) { - this.ISSN_L = ISSN_L; - } - - public String getISSN_IN_DOAJ() { - return ISSN_IN_DOAJ; - } - - public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) { - this.ISSN_IN_DOAJ = ISSN_IN_DOAJ; - } - - public String getISSN_IN_ROAD() { - return ISSN_IN_ROAD; - } - - public void setISSN_IN_ROAD(String ISSN_IN_ROAD) { - this.ISSN_IN_ROAD = ISSN_IN_ROAD; - } - - public String getISSN_IN_PMC() { - return ISSN_IN_PMC; - } - - public void setISSN_IN_PMC(String ISSN_IN_PMC) { - this.ISSN_IN_PMC = ISSN_IN_PMC; - } - - public String getISSN_IN_OAPC() { - return ISSN_IN_OAPC; - } - - public void setISSN_IN_OAPC(String ISSN_IN_OAPC) { - this.ISSN_IN_OAPC = ISSN_IN_OAPC; - } - - public String getISSN_IN_WOS() { - return ISSN_IN_WOS; - } - - public void setISSN_IN_WOS(String ISSN_IN_WOS) { - this.ISSN_IN_WOS = ISSN_IN_WOS; - } - - public String getISSN_IN_SCOPUS() { - return ISSN_IN_SCOPUS; - } - - public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) { - this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS; - } - - public String getJOURNAL_IN_DOAJ() { - return JOURNAL_IN_DOAJ; - } - - public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) { - this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ; - } - - public String getJOURNAL_IN_ROAD() { - return JOURNAL_IN_ROAD; - } - - public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) { - this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD; - } - - public String getJOURNAL_IN_PMC() { - return JOURNAL_IN_PMC; - } - - public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) { - this.JOURNAL_IN_PMC = JOURNAL_IN_PMC; - } - - public String getJOURNAL_IN_OAPC() { - return JOURNAL_IN_OAPC; - } - - public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) { - this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC; - } - - public String getJOURNAL_IN_WOS() { - return JOURNAL_IN_WOS; - } - - public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) { - this.JOURNAL_IN_WOS = JOURNAL_IN_WOS; - } - - public String getJOURNAL_IN_SCOPUS() { - return JOURNAL_IN_SCOPUS; - } - - public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) { - this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS; - } - - public String getTITLE() { - return TITLE; - } - - public void setTITLE(String TITLE) { - this.TITLE = TITLE; - } - - public String getTITLE_SOURCE() { - return TITLE_SOURCE; - } - - public void setTITLE_SOURCE(String TITLE_SOURCE) { - this.TITLE_SOURCE = TITLE_SOURCE; - } -} From 4a9741825d3afd1792ebd8881ba82dde53962328 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 12:28:04 +0300 Subject: [PATCH 028/162] added result_orcid, result_project provenance, issn in datasources --- .../graph/stats/oozie_app/scripts/step13.sql | 20 ++++++++++--------- .../graph/stats/oozie_app/scripts/step15.sql | 11 +--------- .../graph/stats/oozie_app/scripts/step6.sql | 2 +- .../graph/stats/oozie_app/scripts/step7.sql | 9 ++------- .../graph/stats/oozie_app/scripts/step8.sql | 19 +++++++----------- 5 files changed, 22 insertions(+), 39 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index d79396b3bc..e4e81175cc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -57,12 +57,14 @@ UNION ALL SELECT * FROM ${stats_db_name}.software_sources UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; --- --- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + + +create table ${stats_db_name}.result_orcid as +select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid +from ( + SELECT substr(res.id, 4) as id, auth_pid.value as orcid + FROM ${openaire_db_name}.result res + LATERAL VIEW explode(author) a as auth + LATERAL VIEW explode(auth.pid) ap as auth_pid + LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type + WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 8f364d7478..8e66e05c02 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -33,13 +33,4 @@ select * from ${stats_db_name}.dataset_refereed union all select * from ${stats_db_name}.software_refereed union all -select * from ${stats_db_name}.otherresearchproduct_refereed; --- --- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +select * from ${stats_db_name}.otherresearchproduct_refereed; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 5d81e97bb9..4cbdba9317 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -13,7 +13,7 @@ WHERE r.reltype = 'projectOrganization' and r.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.project_results AS -SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result +SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' and r.datainfo.deletedbyinference = false; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index ae540b9b23..b3cbc9b419 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -130,12 +130,7 @@ WHERE r.reltype = 'resultOrganization' and r.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.result_projects AS -select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend +select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id = pr.result - JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; - --- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index de0fedd7e2..5d770dd617 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -17,7 +17,9 @@ CREATE TABLE ${stats_db_name}.datasource_tmp `latitude` STRING, `longitude` STRING, `websiteurl` STRING, - `compatibility` STRING + `compatibility` STRING, + issn_printed STRING, + issn_online STRING ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); -- Insert statement that takes into account the piwik_id of the openAIRE graph @@ -32,7 +34,9 @@ SELECT substr(d1.id, 4) AS id, d1.latitude.value AS latitude, d1.longitude.value AS longitude, d1.websiteurl.value AS websiteurl, - d1.openairecompatibility.classid AS compatibility + d1.openairecompatibility.classid AS compatibility, + d1.journal.issnprinted AS issn_printed, + d1.journal.issnonline AS issn_online FROM ${openaire_db_name}.datasource d1 LEFT OUTER JOIN (SELECT id, split(originalidd, '\\:')[1] as piwik_id @@ -97,13 +101,4 @@ where d.datainfo.deletedbyinference = false; CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result -FROM ${stats_db_name}.result_datasources; - --- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +FROM ${stats_db_name}.result_datasources; \ No newline at end of file From 2fff24df55f2bafd2b0f67837d2f266dcf934fa8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 11:34:19 +0200 Subject: [PATCH 029/162] code formatting --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b94422..58009bfcfc 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c0..63f18a803a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d23718..2c82402900 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { From c806387d4bfa74491375afbc80c39d67547ce9f6 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 20 Jul 2021 19:31:43 +0200 Subject: [PATCH 030/162] tests for enermaps --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 25 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/enermaps.xml | 72 +++++++++++++++++++ .../oa/provision/XmlRecordFactoryTest.java | 29 ++++++++ .../eu/dnetlib/dhp/oa/provision/enermaps.json | 1 + 4 files changed, 127 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 63f18a803a..fb4a5b5da9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -559,6 +559,31 @@ public class MappersTest { assertNotNull(d.getInstance().get(0).getUrl()); } + @Test + void testEnermaps() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + + final Dataset d = (Dataset) list.get(0); + + assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); + assertEquals(1, d.getAuthor().size()); + assertEquals(1, d.getInstance().size()); + assertNotNull(d.getInstance().get(0).getUrl()); + assertNotNull(d.getContext()); + assertTrue(StringUtils.isNotBlank(d.getContext().get(0).getId())); + assertEquals("enermaps::selection::tgs00004", d.getContext().get(0).getId()); + } + @Test void testClaimFromCrossref() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml new file mode 100644 index 0000000000..362b40c85c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml @@ -0,0 +1,72 @@ + + + + enermaps____::04149ee428d07360314c2cb3ba95d41e + tgs00004 + 2021-07-20T18:43:12.096+02:00 + enermaps____ + + + + https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004 + + + Statistical Office of the European Union (Eurostat) + + + + + Regional GDP + + + Statistical Office of the European Union (Eurostat) + 2020 + + 2020-10-07 + + + + OPEN + Creative Commons Attribution 4.0 International + + + GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy. + + 0021 + 2020-10-07 + OPEN + Creative Commons Attribution 4.0 International + + + + + + + + + https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da6..a5a1563aa0 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,6 +7,8 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; +import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -131,4 +133,31 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); } + + @Test + public void testEnermapsRecord() throws IOException, DocumentException { + + String contextmap = "" + + ""+ + ""+ + ""; + + ContextMapper contextMapper = ContextMapper.fromXml(contextmap); + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); + + Dataset d = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + + JoinedEntity je = new JoinedEntity<>(d); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); + assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id")); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json new file mode 100644 index 0000000000..dcd4c2ee17 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626800904248,"id":"50|enermaps____::04149ee428d07360314c2cb3ba95d41e","originalId":["50|enermaps____::04149ee428d07360314c2cb3ba95d41e","tgs00004"],"pid":[],"dateofcollection":"2021-07-20T18:43:12.096+02:00","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-07-20T18:43:12.096+02:00","altered":true,"baseURL":"https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite","identifier":"","datestamp":"","metadataNamespace":""}},"measures":null,"author":[{"fullname":"Statistical Office of the European Union (Eurostat)","name":"","surname":"","rank":1,"pid":[],"affiliation":[]}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"\n Regional GDP\n ","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2020-10-07","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Statistical Office of the European Union (Eurostat)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"id":"enermaps::selection::tgs00004","dataInfo":[{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}]}],"externalReference":[],"instance":[{"license":{"value":"Creative Commons Attribution 4.0 International","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004"],"distributionlocation":null,"collectedfrom":{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"storagedate":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} From df8715a1ecc57b2221a960526700f53d1f6cb676 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 28 Jul 2021 11:58:26 +0200 Subject: [PATCH 031/162] format code after mvn compile --- .../dhp/oa/provision/XmlRecordFactoryTest.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index a5a1563aa0..221049f903 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,8 +7,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; -import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -23,8 +21,10 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -137,17 +137,18 @@ public class XmlRecordFactoryTest { @Test public void testEnermapsRecord() throws IOException, DocumentException { - String contextmap = "" + - ""+ - ""+ - ""; + String contextmap = "" + + + "" + + "" + + ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); Dataset d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); JoinedEntity je = new JoinedEntity<>(d); From 9b181ffa73c6770a97caa9218c747e4d392d98ec Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 16:31:29 +0300 Subject: [PATCH 032/162] added the h2020 classification scheme for projects --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 4cbdba9317..0c4a767a4e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -18,6 +18,12 @@ FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' and r.datainfo.deletedbyinference = false; +create table ${stats_db_name}.project_classification as +select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 +from ${openaire_db_name}project p + lateral view explode(p.h2020classification) classifs as class +where p.datainfo.deletedbyinference=false and class.h2020programme is not null; + CREATE TABLE ${stats_db_name}.project_tmp ( id STRING, From 3d8f0f629b3ff2c9d2b7401b7e62381f1547531c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jul 2021 16:15:15 +0200 Subject: [PATCH 033/162] implemented workflow of creation action set for scholexplorer --- .../datacite/AbstractRestClient.scala | 32 ++++++++--------- dhp-workflows/dhp-graph-provision/pom.xml | 35 +++++++++++++++++++ .../sx/provision/SparkCreateActionset.scala | 31 ++++------------ .../dhp/sx/actionset/oozie_app/workflow.xml | 8 ++--- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 823187afe6..92a870e37a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -64,26 +64,24 @@ abstract class AbstractRestClient extends Iterator[String]{ .setSocketTimeout(timeout * 1000).build() val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() var tries = 4 - try { - while (tries > 0) { + while (tries > 0) { println(s"requesting ${r.getURI}") - val response = client.execute(r) - println(s"get response with status${response.getStatusLine.getStatusCode}") - if (response.getStatusLine.getStatusCode > 400) { - tries -= 1 + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries-=1 } - else - return IOUtils.toString(response.getEntity.getContent) } "" - } catch { - case e: Throwable => - throw new RuntimeException("Error on executing request ", e) - } finally try client.close() - catch { - case e: IOException => - throw new RuntimeException("Unable to close client ", e) - } - } + } getBufferData() } \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index c279436d79..e402d06004 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -9,6 +9,41 @@ dhp-graph-provision + + + + net.alchim31.maven + scala-maven-plugin + 4.0.1 + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + -Xmax-classfile-name + 200 + + ${scala.version} + + + + + + diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala index 6f0cdcf8aa..faf386d257 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala @@ -43,7 +43,7 @@ object SparkCreateActionset { val relation = spark.read.load(s"$sourcePath/relation").as[Relation] relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.save(s"$workingDirFolder/id_relation") + .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] @@ -56,35 +56,18 @@ object SparkCreateActionset { relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") - log.info("saving publication") + log.info("saving entities") - val publication:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/publication").as[Result].map(p => (p.getId, p)) + val entities:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) - publication - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + + entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) + entities + .joinWith(idRelation, entities("_1").equalTo(idRelation("value"))) .map(p => p._1._2) .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - log.info("saving dataset") - val dataset:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/dataset").as[Result].map(p => (p.getId, p)) - dataset - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - log.info("saving software") - val software:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/software").as[Result].map(p => (p.getId, p)) - software - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - - log.info("saving Other Research product") - val orp:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/otherresearchproduct").as[Result].map(p => (p.getId, p)) - orp - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index ef86a17728..7c4b3dd269 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -14,7 +14,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -26,7 +26,7 @@ cluster Create Action Set eu.dnetlib.dhp.sx.provision.SparkCreateActionset - dhp-aggregation-${projectVersion}.jar + dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -42,7 +42,7 @@ --workingDirFolder${workingDirFolder} --masteryarn-cluster - + @@ -53,7 +53,7 @@ cluster Save Action Set eu.dnetlib.dhp.sx.provision.SparkSaveActionSet - dhp-aggregation-${projectVersion}.jar + dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} From 6dddad86ee10543a2d96e3fe7a555bd287492e0c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 16:21:29 +0200 Subject: [PATCH 034/162] [cleaning] title cleaning based on the me.xuender:unidecode library --- dhp-common/pom.xml | 5 ++ .../oaf/utils/GraphCleaningFunctions.java | 19 ++--- .../schema/oaf/utils/OafMapperUtilsTest.java | 25 +++++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 26 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml | 70 +++++++++++++++++++ pom.xml | 5 ++ 6 files changed, 136 insertions(+), 14 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf357..4c7810c47c 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -25,6 +25,11 @@ com.github.sisyphsu dateparser + + me.xuender + unidecode + + org.apache.spark spark-core_2.11 diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index e5181b1119..1d002ed7ec 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -7,22 +7,19 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.jetbrains.annotations.NotNull; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import me.xuender.unidecode.Unidecode; public class GraphCleaningFunctions extends CleaningFunctions { @@ -194,11 +191,15 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter( - sp -> sp - .getValue() - .toLowerCase() - .replaceAll(TITLE_FILTER_REGEX, "") - .length() > TITLE_FILTER_RESIDUAL_LENGTH) + sp -> { + final String title = sp + .getValue() + .toLowerCase(); + final String residual = Unidecode + .decode(title) + .replaceAll(TITLE_FILTER_REGEX, ""); + return residual.length() > TITLE_FILTER_RESIDUAL_LENGTH; + }) .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index eefa1e9a31..8d519a93f6 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -4,12 +4,8 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import java.util.HashSet; import java.util.List; -import java.util.Locale; -import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -19,13 +15,32 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import me.xuender.unidecode.Unidecode; public class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @Test + public void testUnidecode() { + + assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ")); + assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛")); + assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼")); + assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい")); + assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի")); + assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики")); + assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ")); + assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης")); + assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية")); + assertEquals("abc def ghi", Unidecode.decode("abc def ghi")); + } + @Test public void testDateValidation() { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 63f18a803a..ba4211a3f2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.graph.raw; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; @@ -640,6 +642,30 @@ public class MappersTest { System.out.println(p.getTitle().get(0).getValue()); } + @Test + void testJairo() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + + assertNotNull(p.getTitle()); + assertFalse(p.getTitle().isEmpty()); + assertTrue(p.getTitle().size() == 1); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + + final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + + assertNotNull(p_cleaned.getTitle()); + assertFalse(p_cleaned.getTitle().isEmpty()); + } + @Test void testOdfFromHdfs() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml new file mode 100644 index 0000000000..9ec696256f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml @@ -0,0 +1,70 @@ + + +
+ jairo_______::000012e58ed836576ef2a0d38b0f726f + oai:irdb.nii.ac.jp:01221:0000010198 + + + + + + 2021-05-10T11:31:09.424Z + 2021-06-03T01:45:42.536Z + jairo_______ +
+ + 多項式GCDを用いた復号法に関する研究 + 上原, 剛 + 甲斐, 博 + 野田, 松太郎 + application/pdf + http://hdl.handle.net/2433/25934 + jpn + 京都大学数理解析研究所 + 410 + Departmental Bulletin Paper + 0014 + 2004-10-01 + + openaire____::554c7c2873 + OPEN + + + 2433/25934 + AN00061013 + http://hdl.handle.net/2433/25934 + http://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/25934/1/1395-16.pdf + 数理解析研究所講究録 + + + + + https%3A%2F%2Firdb.nii.ac.jp%2Foai + oai:irdb.nii.ac.jp:01221:0000010198 + 2021-04-13T13:36:29Z + + + http://repository.kulib.kyoto-u.ac.jp/dspace-oai/request + oai:repository.kulib.kyoto-u.ac.jp:2433/25934 + 2012-07-12T14:15:41Z + http://irdb.nii.ac.jp/oai + + + + + false + false + 0.9 + + + + +
\ No newline at end of file diff --git a/pom.xml b/pom.xml index 6e4526e41c..fc4a8a21b9 100644 --- a/pom.xml +++ b/pom.xml @@ -205,6 +205,11 @@ dateparser 1.0.7
+ + me.xuender + unidecode + 0.0.7 + com.google.guava From 4c5a71ba2f968e4a6eaf993eb2590306f92ad8d5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 17:11:18 +0200 Subject: [PATCH 035/162] [broker] updated relation descriptors, making use of constant values --- .../EnrichMissingDatasetIsReferencedBy.java | 3 ++- .../EnrichMissingDatasetIsRelatedTo.java | 3 ++- .../EnrichMissingDatasetIsSupplementedBy.java | 3 ++- .../EnrichMissingDatasetIsSupplementedTo.java | 3 ++- .../EnrichMissingDatasetReferences.java | 3 ++- .../EnrichMissingPublicationIsReferencedBy.java | 3 ++- .../EnrichMissingPublicationIsRelatedTo.java | 3 ++- .../EnrichMissingPublicationIsSupplementedBy.java | 3 ++- .../EnrichMissingPublicationIsSupplementedTo.java | 3 ++- .../EnrichMissingPublicationReferences.java | 3 ++- .../eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java | 13 +++++++------ 11 files changed, 27 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java index 21786687ee..bcbcf755f9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDat @Override protected boolean filterByType(final String relType) { - return relType.equals("isReferencedBy"); + return relType.equals(ModelConstants.IS_REFERENCED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java index 0f3739434e..4125974ce1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDatase @Override protected boolean filterByType(final String relType) { - return relType.equals("isRelatedTo"); + return relType.equals(ModelConstants.IS_RELATED_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java index cde227feed..480daf6661 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingD @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedBy"); + return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java index 750165ff5a..97b1eb8bdb 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingD @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedTo"); + return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java index b1c0afe16f..0978486a37 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset @Override protected boolean filterByType(final String relType) { - return relType.equals("references"); + return relType.equals(ModelConstants.REFERENCES); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java index eebb5c1a66..ff9155c9d0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { @@ -11,6 +12,6 @@ public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissin @Override protected boolean filterByType(final String relType) { - return relType.equals("isReferencedBy"); + return relType.equals(ModelConstants.IS_REFERENCED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java index a8aa550d44..1051559c9e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPu @Override protected boolean filterByType(final String relType) { - return relType.equals("isRelatedTo"); + return relType.equals(ModelConstants.IS_RELATED_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java index 762ac942e4..d97f46f093 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { @@ -11,6 +12,6 @@ public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMiss @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedBy"); + return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java index fc7196a015..b33b340e33 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMiss @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedTo"); + return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java index da19944549..fe0f96b6e5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPub @Override protected boolean filterByType(final String relType) { - return relType.equals("references"); + return relType.equals(ModelConstants.REFERENCES); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index c7be633a9d..f578548fb6 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -5,6 +5,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -52,15 +53,15 @@ public class ClusterUtils { } public static boolean isDedupRoot(final String id) { - return id.contains("dedup_wf_"); + return id.contains("dedup"); } public static final boolean isValidResultResultClass(final String s) { - return s.equals("isReferencedBy") - || s.equals("isRelatedTo") - || s.equals("references") - || s.equals("isSupplementedBy") - || s.equals("isSupplementedTo"); + return s.equals(ModelConstants.IS_REFERENCED_BY) + || s.equals(ModelConstants.IS_RELATED_TO) + || s.equals(ModelConstants.REFERENCES) + || s.equals(ModelConstants.IS_SUPPLEMENTED_BY) + || s.equals(ModelConstants.IS_SUPPLEMENT_TO); } public static T incrementAccumulator(final T o, final LongAccumulator acc) { From 3d1580fa9b81fec1a066d74e91de122d519099da Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 18:50:31 +0300 Subject: [PATCH 036/162] fixed a typo --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 0c4a767a4e..378e0f17be 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -20,7 +20,7 @@ WHERE r.reltype = 'resultProject' create table ${stats_db_name}.project_classification as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 -from ${openaire_db_name}project p +from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class where p.datainfo.deletedbyinference=false and class.h2020programme is not null; From 4afa5215a9ad70c86bb4bd0db9ce9fc039e4ed2a Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 21:59:12 +0300 Subject: [PATCH 037/162] fixed a NPE? --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 5d770dd617..76d31eb5e9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -55,7 +55,7 @@ CREATE TABLE ${stats_db_name}.dual INSERT INTO ${stats_db_name}.dual VALUES ('X'); INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`, - `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`) + `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`) SELECT 'other', 'Other', 'Repository', @@ -66,7 +66,9 @@ SELECT 'other', NULL, NULL, NULL, - 'unknown' + 'unknown', + null, + null FROM ${stats_db_name}.dual WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); DROP TABLE ${stats_db_name}.dual; From 3721df7aa6d54bf20c1c709a1dcd3be3d8dc3af4 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 29 Jul 2021 10:45:24 +0200 Subject: [PATCH 038/162] refactoring create actionset of scholexplorer, moved on package dhp-aggregation --- .../scholix}/SparkCreateActionset.scala | 22 +++++++++---------- .../scholix}/SparkSaveActionSet.scala | 14 ++++++------ .../dhp/sx/actionset/generate_actionset.json | 0 .../sx/actionset/oozie_app/config-default.xml | 0 .../dhp/sx/actionset/oozie_app/workflow.xml | 4 ++-- .../dhp/sx/actionset/save_actionset.json | 0 6 files changed, 20 insertions(+), 20 deletions(-) rename dhp-workflows/{dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix}/SparkCreateActionset.scala (63%) rename dhp-workflows/{dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix}/SparkSaveActionSet.scala (86%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json (100%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml (95%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json (100%) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 63% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala index faf386d257..b78f411ee1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.sx.provision +package eu.dnetlib.dhp.actionmanager.scholix import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import org.apache.spark.{SparkConf, sql} -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.SparkConf +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -34,16 +34,16 @@ object SparkCreateActionset { val workingDirFolder = parser.get("workingDirFolder") log.info(s"workingDirFolder -> $workingDirFolder") - implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resultEncoders:Encoder[Result] = Encoders.kryo[Result] - implicit val relationEncoders:Encoder[Relation] = Encoders.kryo[Relation] + implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result] + implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation] - import spark.implicits._ + import spark.implicits._ val relation = spark.read.load(s"$sourcePath/relation").as[Relation] - relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") + relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] @@ -53,12 +53,12 @@ object SparkCreateActionset { log.info("save relation filtered") - relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") log.info("saving entities") - val entities:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) + val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 86% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index d1d0b84242..1df7ea3fb1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.sx.provision +package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat @@ -73,13 +73,13 @@ object SparkSaveActionSet { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) + implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) spark.read.load(sourcePath).as[Oaf] - .map(o =>toActionSet(o)) - .filter(o => o!= null) - .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) + .map(o => toActionSet(o)) + .filter(o => o != null) + .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml similarity index 95% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index 7c4b3dd269..8c045fcfe7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -26,7 +26,7 @@ cluster Create Action Set eu.dnetlib.dhp.sx.provision.SparkCreateActionset - dhp-graph-provision-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -53,7 +53,7 @@ cluster Save Action Set eu.dnetlib.dhp.sx.provision.SparkSaveActionSet - dhp-graph-provision-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json From 908f57a4758b2aa2ff1c93cad41fcf2328c73bbe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 10:49:39 +0200 Subject: [PATCH 039/162] code formatting --- .../main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index f578548fb6..7c4ca1d22f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -5,7 +5,6 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -18,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class ClusterUtils { From e87e1805c4280e9d8ed9be9f733d331401273118 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 12:13:06 +0200 Subject: [PATCH 040/162] [raw_all] added extra workflow step for patching the identifiers in the relations, given an id mapping dataset --- .../graph/raw/PatchRelationsApplication.java | 115 ++++++++++++++++++ .../graph/raw/common/RelationIdMapping.java | 24 ++++ .../oa/graph/patch_relations_parameters.json | 26 ++++ .../oa/graph/raw_all/oozie_app/workflow.xml | 47 ++++++- 4 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java new file mode 100644 index 0000000000..c2bcf69f09 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.oa.graph.raw; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class PatchRelationsApplication { + + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional.ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new) + )); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); + + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); + + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); + + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } + + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + + final String relationPath = graphBasePath + "/relation"; + + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + + rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)) + .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark.read().textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java new file mode 100644 index 0000000000..f251da8c33 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.oa.graph.raw.common; + +public class RelationIdMapping { + + private String oldId; + + private String newId; + + public String getOldId() { + return oldId; + } + + public void setOldId(final String oldId) { + this.oldId = oldId; + } + + public String getNewId() { + return newId; + } + + public void setNewId(final String newId) { + this.newId = newId; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json new file mode 100644 index 0000000000..178c2d69bf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "g", + "paramLongName": "graphBasePath", + "paramDescription": "base graph path providing the set of relations to patch", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingDir", + "paramDescription": "intermediate storage location", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "idMappingPath", + "paramDescription": "dataset providing the old -> new identifier mapping", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 7f1ecb39fb..e7320de3b3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -100,6 +100,16 @@ a blacklist of nsprefixes (comma separeted) + + shouldPatchRelations + false + activates the relation patching phase, driven by the content in ${idMappingPath} + + + idMappingPath + + path pointing to the relations identifiers mapping dataset + sparkDriverMemory memory for driver process @@ -538,7 +548,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${workingDir}/graph_raw + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + From 5d08ad86ae45478db3742fef51c7c0ae38f30e34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:03:16 +0200 Subject: [PATCH 041/162] [raw_all] patching relation identifier phase to be run at the end, i.e. includes also claimed relations --- .../oa/graph/raw_all/oozie_app/workflow.xml | 75 +++++++++---------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index e7320de3b3..321ca40909 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -548,42 +548,7 @@ - - - - - - ${(shouldPatchRelations eq "true") and - (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} - - - - - - - - yarn - cluster - PatchRelations - eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores ${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --graphBasePath${workingDir}/graph_raw - --workingDir${workingDir}/patch_relations - --idMappingPath${idMappingPath} - - - - + @@ -596,7 +561,6 @@ - yarn @@ -805,7 +769,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${graphOutputPath} + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + \ No newline at end of file From baad01cadcc499909d68651ff03a60213b7b8c76 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 29 Jul 2021 13:04:39 +0200 Subject: [PATCH 042/162] hostedbymap --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../doiboost/DoiBoostMappingUtil.scala | 2 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ------ .../dhp/oa/graph/hostebymap/Constants.java | 15 -- .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ------------ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ------ .../hostebymap/model/UnibiGoldModel.java | 44 ----- .../oa/graph/hostedbymap/Aggregators.scala | 97 ++++++++++ .../dhp/oa/graph/hostedbymap/Constants.java | 13 ++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 107 +++++++++++ .../SparkProduceHostedByMap.scala} | 107 ++++++----- .../oa/graph/hostedbymap/model/DOAJModel.java | 52 ++++++ .../hostedbymap/model/UnibiGoldModel.java | 45 +++++ .../hostedbymap/download_csv_parameters.json | 37 ++++ .../oa/graph/hostedbymap/hostedby_params.json | 38 ++++ .../hostedbymap/oozie_app/config-default.xml | 30 ++++ .../graph/hostedbymap/oozie_app/workflow.xml | 148 +++++++++++++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 127 +++++++++---- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 168 +++++++++--------- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 +-- .../dhp/oa/graph/hostedbymap/datasource.json | 2 +- .../graph/hostedbymap/datasourceHostedByItem | 9 + .../dhp/oa/graph/hostedbymap/doajHostedByItem | 25 +++ .../oa/graph/hostedbymap/unibyHostedByItem | 29 +++ 24 files changed, 885 insertions(+), 459 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java rename dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/{hostebymap/SparkPrepareHostedByMapData.scala => hostedbymap/SparkProduceHostedByMap.scala} (76%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b94422..58009bfcfc 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f15..502cb370f8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -245,6 +245,8 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) + //TODO replace with the one above as soon as the new HBM will be used + //hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala deleted file mode 100644 index 561d2bbf45..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} -import org.apache.spark.sql.expressions.Aggregator - - -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} -case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} - -object Aggregators { - - - - def getId(s1:String, s2:String) : String = { - if (!s1.equals("")){ - return s1} - s2 - } - - - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java deleted file mode 100644 index b07e33cd19..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java +++ /dev/null @@ -1,15 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -public class Constants { - - - - public static final String OPENAIRE = "openaire"; - public static final String DOAJ = "doaj"; - public static final String UNIBI = "unibi"; - - - public static final String ISSN = "issn"; - public static final String EISSN = "eissn"; - public static final String ISSNL = "issnl"; -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java deleted file mode 100644 index d397886a32..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Optional; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - - URLConnection connection = new URL(fileURL).openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - - if(shouldReplace){ - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - - Class clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - - - writer.close(); - in.close(); - if(shouldReplace){ - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - - } - - - - -} - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java deleted file mode 100644 index fe1d14a763..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java +++ /dev/null @@ -1,53 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn ; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java deleted file mode 100644 index 309f74eea5..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java +++ /dev/null @@ -1,44 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import com.opencsv.bean.CsvBindByName; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala new file mode 100644 index 0000000000..6a9346ed50 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -0,0 +1,97 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (s1.startsWith("10|")){ + return s1} + s2 + } + + def getValue(s1:String, s2:String) : String = { + if(!s1.equals("")){ + return s1 + } + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { + val transformedData : Dataset[(String, HostedByItemType)] = df + .groupByKey(_._1)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator1) + .map{ + case (id:String , res:(String, HostedByItemType)) => res + }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + transformedData + } + + val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) + override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { + return merge(b, a) + } + override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(b1._2.id.startsWith("10|")){ + return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction + override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + + override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java new file mode 100644 index 0000000000..b29877a48f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +public class Constants { + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java new file mode 100644 index 0000000000..9516cf6f76 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java @@ -0,0 +1,107 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("workingPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional + .ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + URLConnection connection = new URL(fileURL).openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if (shouldReplace) { + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + writer.close(); + in.close(); + if (shouldReplace) { + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index b66e972818..c44f2cbed7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,17 +1,23 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap +package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.schema.oaf.{Datasource} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import java.io.PrintWriter -object SparkPrepareHostedByMapData { +import org.apache.hadoop.io.compress.GzipCodec + + +object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) @@ -37,24 +43,32 @@ object SparkPrepareHostedByMapData { } } -// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { -// implicit val formats = DefaultFormats -// val serializedJSON:String = write(input) -// -// var hostedBy = new ListBuffer[String]() -// if(!input.issn.equals("")){ -// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" -// } -// if(!input.eissn.equals("")){ -// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" -// } -// if(!input.lissn.equals("")){ -// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" -// } -// -// hostedBy -// -// } + def toHostedByMap(input: (String, HostedByItemType)): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + + Serialization.write(map) + + + } + + /** + * + def toHostedByMap(input: Map[String, HostedByItemType]): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + + + Serialization.write(input) + + + } + */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ @@ -166,11 +180,31 @@ object SparkPrepareHostedByMapData { } + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + val conf = new Configuration() + + conf.set("fs.defaultFS", hdfsNameNode) + val fs= FileSystem.get(conf) + val output = fs.create(new Path(outputPath)) + val writer = new PrintWriter(output) + try { + input.foreach(hbi => writer.println(hbi)) + } + finally { + writer.close() + + } + + } + + + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -179,11 +213,10 @@ object SparkPrepareHostedByMapData { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - import spark.implicits._ val datasourcePath = parser.get("datasourcePath") val workingDirPath = parser.get("workingPath") - + val outputPath = parser.get("outputPath") implicit val formats = DefaultFormats @@ -191,29 +224,15 @@ object SparkPrepareHostedByMapData { logger.info("Getting the Datasources") - // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val dats : Dataset[HostedByItemType] = - oaHostedByDataset(spark, datasourcePath) + Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) - dats.flatMap(hbi => toList(hbi)) - .groupByKey(_._1) + .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) + .map(hbi => toHostedByMap(hbi))(Encoders.STRING) + .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) -// -// - -// - -// -// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") -// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) -// .filter(i => i != null)) -// .flatMap(toHostedByMap) -// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") -// -// } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java new file mode 100644 index 0000000000..ba804b939d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java new file mode 100644 index 0000000000..0927a136be --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json new file mode 100644 index 0000000000..fba048343b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"fu", + "paramLongName":"fileURL", + "paramDescription": "the url to download the csv file ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "cfn", + "paramLongName": "classForName", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "sr", + "paramLongName": "replace", + "paramDescription": "true if the input file has to be cleaned before parsing", + "paramRequired": false + } +] + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json new file mode 100644 index 0000000000..9173b78aed --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json @@ -0,0 +1,38 @@ + +[ + + { + "paramName":"dsp", + "paramLongName":"datasourcePath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml new file mode 100644 index 0000000000..e5ec3d0aee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml new file mode 100644 index 0000000000..ecf6c3b316 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -0,0 +1,148 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --workingPath${workingDir}/unibi_gold + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${doajFileURL} + --workingPath${workingDir}/doaj + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --replacetrue + + + + + + + + + + + + yarn-cluster + Produce the new HostedByMap + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --datasourcePath${sourcePath}/datasource + --workingPath${workingDir} + --outputPath${outputPath} + --masteryarn-cluster + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 8e5657bfc1..2ed76a72ac 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,19 +1,14 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import java.sql.Timestamp - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.mutable.ListBuffer -import scala.io.Source +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write class TestPreprocess extends java.io.Serializable{ @@ -21,19 +16,14 @@ class TestPreprocess extends java.io.Serializable{ implicit val schema = Encoders.product[HostedByInfo] + def toHBIString (hbi:HostedByItemType): String = { + implicit val formats = DefaultFormats + + write(hbi) + } @Test def readDatasource():Unit = { - - - import org.apache.spark.sql.Encoders - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -45,25 +35,29 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("datasource.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) + assertEquals(9, ds.count) + assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readGold():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -76,23 +70,27 @@ class TestPreprocess extends java.io.Serializable{ val path = getClass.getResource("unibi_transformed.json").getPath - println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + assertEquals(29, ds.count) + assertEquals(29, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readDoaj():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -104,14 +102,69 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("doaj_transformed.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + assertEquals(25, ds.count) + assertEquals(14, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.DOAJ))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } + @Test + def testAggregator() : Unit = { + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + + + val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + assertEquals(106, tmp.count) + assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + + + val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) + + assertEquals(82, ds.count) + + assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) + + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.id.startsWith("10|")) + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) + assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) + + val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + + hbmap.foreach(entry => println(entry)) + spark.close() + + } + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 01c70502c7..f886b275b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -1,111 +1,109 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.List; + +import org.junit.jupiter.api.Test; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.junit.jupiter.api.Test; -import java.io.*; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; -public class TestReadCSV { +public class TestReadCSV { - @Test - public void testCSVUnibi() throws FileNotFoundException { + @Test + public void testCSVUnibi() throws FileNotFoundException { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); + ObjectMapper mapper = new ObjectMapper(); - ObjectMapper mapper = new ObjectMapper(); + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + } + @Test + public void testCSVUrlUnibi() throws IOException { - } + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - @Test - public void testCSVUrlUnibi() throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> + ); + } - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } + @Test + public void testCSVUrlDOAJ() throws IOException { + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); - ); - } + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); - @Test - public void testCSVUrlDOAJ() throws IOException { + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - - ); - } + ); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c0..63f18a803a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 818aaa7167..4467c702f6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,6 +1,6 @@ {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem new file mode 100644 index 0000000000..093c57a9c7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2640-4613","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem new file mode 100644 index 0000000000..effd0dd601 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem @@ -0,0 +1,25 @@ +{"id":"doaj","officialname":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","lissn":"","openAccess":true} +{"id":"doaj","officialname":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Membranes","issn":"2077-0375","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Agriculture","issn":"","eissn":"2077-0472","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Religions","issn":"","eissn":"2077-1444","lissn":"","openAccess":true} +{"id":"doaj","officialname":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","lissn":"","openAccess":true} +{"id":"doaj","officialname":"UCV-Scientia","issn":"2077-172X","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Granì","issn":"2077-1800","eissn":"2413-8738","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Science Education International","issn":"","eissn":"2077-2327","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Edumecentro","issn":"","eissn":"2077-2874","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Monteverdia","issn":"","eissn":"2077-2890","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Transformación","issn":"","eissn":"2077-2955","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue de Primatologie","issn":"","eissn":"2077-3757","lissn":"","openAccess":true} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem new file mode 100644 index 0000000000..403ffdf5dc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem @@ -0,0 +1,29 @@ +{"id":"unibi","officialname":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","issn":"2502-731X","eissn":"","lissn":"2502-731X","openAccess":true} +{"id":"unibi","officialname":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","issn":"2502-7409","eissn":"","lissn":"1411-0253","openAccess":true} +{"id":"unibi","officialname":"At-Tadbir : jurnal ilmiah manajemen","issn":"2502-7433","eissn":"","lissn":"2502-7433","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kesehatan Panrita Husada.","issn":"2502-745X","eissn":"","lissn":"2502-745X","openAccess":true} +{"id":"unibi","officialname":"ELang journal (An English Education journal)","issn":"2502-7549","eissn":"","lissn":"2502-7549","openAccess":true} +{"id":"unibi","officialname":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","issn":"2423-3633","eissn":"","lissn":"2423-3625","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ̒ilm/sanjī.","issn":"2423-5563","eissn":"","lissn":"2423-3773","openAccess":true} +{"id":"unibi","officialname":"Iranian journal of animal biosystematics.","issn":"1735-434X","eissn":"","lissn":"1735-434X","openAccess":true} +{"id":"unibi","officialname":"Majallah-i jangal-i Īrān.","issn":"2423-4435","eissn":"","lissn":"2008-6113","openAccess":true} +{"id":"unibi","officialname":"Ābziyān-i zinatī.","issn":"2423-4575","eissn":"","lissn":"2423-4575","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","issn":"2423-4974","eissn":"","lissn":"2423-4974","openAccess":true} +{"id":"unibi","officialname":"AIHM journal club.","issn":"2380-0607","eissn":"","lissn":"2380-0607","openAccess":true} +{"id":"unibi","officialname":"Frontiers.","issn":"1085-4568","eissn":"","lissn":"1085-4568","openAccess":true} +{"id":"unibi","officialname":"˜The œjournal of contemporary archival studies.","issn":"2380-8845","eissn":"","lissn":"2380-8845","openAccess":true} +{"id":"unibi","officialname":"International journal of complementary & alternative medicine.","issn":"2381-1803","eissn":"","lissn":"2381-1803","openAccess":true} +{"id":"unibi","officialname":"Palapala.","issn":"2381-2478","eissn":"","lissn":"2381-2478","openAccess":true} +{"id":"unibi","officialname":"Asia pacific journal of environment ecology and sustainable development.","issn":"2382-5170","eissn":"","lissn":"2382-5170","openAccess":true} +{"id":"unibi","officialname":"Majallah-i salāmat va bihdāsht","issn":"2382-9737","eissn":"","lissn":"2382-9737","openAccess":true} +{"id":"unibi","officialname":"UCT journal of research in science ,engineering and technology","issn":"2382-977X","eissn":"","lissn":"2382-977X","openAccess":true} +{"id":"unibi","officialname":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","issn":"2382-9974","eissn":"","lissn":"2382-9974","openAccess":true} +{"id":"unibi","officialname":"Problemi endokrinnoï patologìï.","issn":"2227-4782","eissn":"","lissn":"2227-4782","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","issn":"2685-0079","eissn":"","lissn":"2597-4971","openAccess":true} +{"id":"unibi","officialname":"Hypermedia magazine.","issn":"2574-0075","eissn":"","lissn":"2574-0075","openAccess":true} +{"id":"unibi","officialname":"˜The œmuseum review.","issn":"2574-0296","eissn":"","lissn":"2574-0296","openAccess":true} +{"id":"unibi","officialname":"Bioactive compounds in health and disease.","issn":"2574-0334","eissn":"","lissn":"2574-0334","openAccess":true} +{"id":"unibi","officialname":"Journal of computer science integration.","issn":"2574-108X","eissn":"","lissn":"2574-108X","openAccess":true} +{"id":"unibi","officialname":"Child and adolescent obesity.","issn":"2574-254X","eissn":"","lissn":"2574-254X","openAccess":true} +{"id":"unibi","officialname":"Journal of research on the college president.","issn":"2574-3325","eissn":"","lissn":"2574-3325","openAccess":true} +{"id":"unibi","officialname":"European journal of sustainable development.","issn":"2239-6101","eissn":"","lissn":"2239-5938","openAccess":true} \ No newline at end of file From b1b0cc3f157df3998d2fe1392e6b5b7e76f75c12 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 29 Jul 2021 13:54:56 +0200 Subject: [PATCH 043/162] fixed wrong package name --- .../eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index 8c045fcfe7..2d97b51633 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -25,7 +25,7 @@ yarn-cluster cluster Create Action Set - eu.dnetlib.dhp.sx.provision.SparkCreateActionset + eu.dnetlib.dhp.actionmanager.scholix.SparkCreateActionset dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -52,7 +52,7 @@ yarn-cluster cluster Save Action Set - eu.dnetlib.dhp.sx.provision.SparkSaveActionSet + eu.dnetlib.dhp.actionmanager.scholix.SparkSaveActionSet dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} From c53d106e80eca2af6456925130dd856bf5d33c49 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:56:37 +0200 Subject: [PATCH 044/162] [provision] lowercase relation filter --- .../java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index 7d53d35549..b3f7854924 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -10,6 +10,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -81,6 +82,7 @@ public class PrepareRelationsJob { Set relationFilter = Optional .ofNullable(parser.get("relationFilter")) + .map(String::toLowerCase) .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) .orElse(new HashSet<>()); log.info("relationFilter: {}", relationFilter); @@ -130,7 +132,7 @@ public class PrepareRelationsJob { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(rel.getRelClass()) == false); + .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); JavaRDD pruned = pruneRels( pruneRels( From 26af0320d0be2f88187b8a837f357b6634bd315b Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 30 Jul 2021 00:31:33 +0300 Subject: [PATCH 045/162] added the sprint 2 indicators in monitor db --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 7442b7c10c..5da0283047 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -116,6 +116,13 @@ compute stats TARGET.indi_pub_doi_from_crossref; create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_gold_oa; +create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; +create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; +create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; +create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; +create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; +create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 6358f92c3a5301bd25ec00259453490d19e318aa Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 30 Jul 2021 08:54:25 +0200 Subject: [PATCH 046/162] added sleep to solve problem of lost request of creating index --- .../java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index f96a64a27b..ffeb0995d1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -71,6 +71,9 @@ public class DropAndCreateESIndex { log.info(STATUS_CODE_TEXT, response.getStatusLine()); } + log.info("Sleeping 60 seconds to avoid to lost the creation of index request"); + Thread.sleep(60000); + try (CloseableHttpClient client = HttpClients.createDefault()) { final String summaryConf = IOUtils From 19620eed46d8f94474ebf27b21444f4c901080f7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:09:32 +0200 Subject: [PATCH 047/162] applying PR#131, Patch the identifiers (source/target) in the relations, refinements --- .../graph/raw/PatchRelationsApplication.java | 180 ++++++++++-------- .../graph/raw/common/RelationIdMapping.java | 29 +-- .../raw/PatchRelationApplicationTest.java | 115 +++++++++++ .../dnetlib/dhp/oa/graph/raw/id_mapping.json | 5 + .../dhp/oa/graph/raw/relations_to_patch.json | 6 + 5 files changed, 237 insertions(+), 98 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index c2bcf69f09..5523863ff0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -1,10 +1,12 @@ + package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -15,101 +17,111 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; -import java.io.FileNotFoundException; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class PatchRelationsApplication { - private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Optional.ofNullable( - PatchRelationsApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) - .orElseThrow(FileNotFoundException::new) - )); - parser.parseArgument(args); + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional + .ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new))); + parser.parseArgument(args); - final Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String graphBasePath = parser.get("graphBasePath"); - log.info("graphBasePath: {}", graphBasePath); + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); - final String workingDir = parser.get("workingDir"); - log.info("workingDir: {}", workingDir); + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); - final String idMappingPath = parser.get("idMappingPath"); - log.info("idMappingPath: {}", idMappingPath); + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); - final SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); - } + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } - /** - * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the - * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. - * - * @param spark the SparkSession - * @param graphBasePath base graph path providing the set of relations to patch - * @param workingDir intermediate storage location - * @param idMappingPath dataset providing the old -> new identifier mapping - */ - private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, + final String idMappingPath) { - final String relationPath = graphBasePath + "/relation"; + final String relationPath = graphBasePath + "/relation"; - final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); - final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); - rels - .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setSource); - return r; - }, Encoders.bean(Relation.class)) - .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setTarget); - return r; - }, Encoders.bean(Relation.class)) - .map( - (MapFunction) OBJECT_MAPPER::writeValueAsString, - Encoders.STRING()) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(workingDir); + log.info("relations: {}", rels.count()); + log.info("idMapping: {}", idMapping.count()); - spark.read().textFile(workingDir) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(relationPath); - } + final Dataset bySource = rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)); + bySource + .joinWith(idMapping, bySource.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark + .read() + .textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java index f251da8c33..d5852ab709 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -1,24 +1,25 @@ + package eu.dnetlib.dhp.oa.graph.raw.common; public class RelationIdMapping { - private String oldId; + private String oldId; - private String newId; + private String newId; - public String getOldId() { - return oldId; - } + public String getOldId() { + return oldId; + } - public void setOldId(final String oldId) { - this.oldId = oldId; - } + public void setOldId(final String oldId) { + this.oldId = oldId; + } - public String getNewId() { - return newId; - } + public String getNewId() { + return newId; + } - public void setNewId(final String newId) { - this.newId = newId; - } + public void setNewId(final String newId) { + this.newId = newId; + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java new file mode 100644 index 0000000000..3fd3654161 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -0,0 +1,115 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class PatchRelationApplicationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final String ID_MAPPING_PATH = "map/id_mapping.json"; + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(PatchRelationApplicationTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PatchRelationApplicationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PatchRelationApplicationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PatchRelationApplicationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("id_mapping.json"), + workingDir.resolve(ID_MAPPING_PATH).toFile()); + + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("relations_to_patch.json"), + workingDir.resolve("graphBasePath/relation/rels.json").toFile()); + + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testPatchRelationApplication() throws Exception { + + final String graphBasePath = workingDir.toString() + "/graphBasePath"; + PatchRelationsApplication.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphBasePath", graphBasePath, + "-workingDir", workingDir.toString() + "/workingDir", + "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + }); + + final List rels = spark + .read() + .textFile(graphBasePath + "/relation") + .map( + (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), + Encoders.bean(Relation.class)) + .collectAsList(); + + assertEquals(6, rels.size()); + + assertEquals(0, getCount(rels, "1a"), "should be patched to 1b"); + assertEquals(0, getCount(rels, "2a"), "should be patched to 2b"); + + assertEquals(2, getCount(rels, "10a"), "not included in patching"); + assertEquals(2, getCount(rels, "20a"), "not included in patching"); + + assertEquals(2, getCount(rels, "15a"), "not included in patching"); + assertEquals(2, getCount(rels, "25a"), "not included in patching"); + + assertEquals(2, getCount(rels, "1b"), "patched from 1a"); + assertEquals(2, getCount(rels, "2b"), "patched from 2a"); + } + + private long getCount(List rels, final String id) { + return rels.stream().filter(r -> r.getSource().equals(id) || r.getTarget().equals(id)).count(); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json new file mode 100644 index 0000000000..640d042b1d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json @@ -0,0 +1,5 @@ +{"oldId": "1a", "newId": "1b"} +{"oldId": "2a", "newId": "2b"} +{"oldId": "3a", "newId": "3b"} +{"oldId": "4a", "newId": "4b"} +{"oldId": "5a", "newId": "5b"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json new file mode 100644 index 0000000000..31755c53d5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From e244f73165a6b90a9b36686a0180685128695f8e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:54:13 +0200 Subject: [PATCH 048/162] Update 'README.md' --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39d4d98e4a..0a0bd82ab5 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ # dnet-hadoop -Dnet-hadoop is a tool for \ No newline at end of file +Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning. \ No newline at end of file From 613bd3bde0076113dd37a64d2efbc23ec8e9ec61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:54:45 +0200 Subject: [PATCH 049/162] Hosted By Map - refactor of the first attemp to prepare a new hosted by map dependent on the datasource in the graph and on two external sources: the gold list from unibi ad the doaj list of open access journal. Both the lists are downloaded from provided url parameter --- .../graph/hostedbymap/SparkProduceHostedByMap.scala | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index c44f2cbed7..ca5e82a4a1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -55,20 +55,7 @@ object SparkProduceHostedByMap { } - /** - * - def toHostedByMap(input: Map[String, HostedByItemType]): String = { - import org.json4s.jackson.Serialization - implicit val formats = org.json4s.DefaultFormats - - - - Serialization.write(input) - - - } - */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ From d8b9b0553b7c89b7831625dd3804384c9dab4e03 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:55:39 +0200 Subject: [PATCH 050/162] Hosted By Map - model classes to store the intermediate information to be used to apply the hosted by map --- .../hostedbymap/model/DatasourceInfo.java | 72 +++++++++++++++++++ .../graph/hostedbymap/model/EntityInfo.java | 69 ++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java new file mode 100644 index 0000000000..f5ac8f70c5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class DatasourceInfo implements Serializable { + private String id; + private String officialname; + private String issn; + private String eissn; + private String lissn; + private Boolean openAccess; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getOfficialname() { + return officialname; + } + + public void setOfficialname(String officialname) { + this.officialname = officialname; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getLissn() { + return lissn; + } + + public void setLissn(String lissn) { + this.lissn = lissn; + } + + public Boolean getOpenAccess() { + return openAccess; + } + + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; + } + + public static DatasourceInfo newInstance(String id, String officialname, String issn, String eissn, String lissn){ + DatasourceInfo di = new DatasourceInfo(); + di.id = id; + di.officialname = officialname; + di.issn = (issn != null) ? issn : "" ; + di.eissn = (eissn != null) ? eissn:""; + di.lissn = (lissn != null) ? lissn : ""; + di.openAccess = false; + + return di; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java new file mode 100644 index 0000000000..7bf9d05982 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -0,0 +1,69 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class EntityInfo implements Serializable { + private String id; + private String journal_id; + private String name; + private Boolean openaccess; + private String hb_id; + + + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getJournal_id() { + return journal_id; + } + + public void setJournal_id(String journal_id) { + this.journal_id = journal_id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Boolean getOpenaccess() { + return openaccess; + } + + public void setOpenaccess(Boolean openaccess) { + this.openaccess = openaccess; + } + + public String getHb_id() { + return hb_id; + } + + public void setHb_id(String hb_id) { + this.hb_id = hb_id; + } + + public static EntityInfo newInstance(String id, String j_id, String name){ + return newInstance(id, j_id, name, false); + + } + + public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess){ + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journal_id = j_id; + pi.name = name; + pi.openaccess = openaccess; + pi.hb_id = ""; + return pi; + + } +} From 7c6ea2f4c700c3ce59dd78e5e4eed4ea8c198c5e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:56:27 +0200 Subject: [PATCH 051/162] Hosted By Map - first attempt for the creation of intermedia information to be used to applu the hosted by map on the graph entities --- .../oa/graph/hostedbymap/Aggregators.scala | 113 +++++++++----- .../SparkPrepareHostedByInfoToApply.scala | 147 ++++++++++++++++++ 2 files changed, 225 insertions(+), 35 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 6a9346ed50..8077efe302 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,5 +1,6 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} import org.apache.spark.sql.expressions.Aggregator @@ -25,43 +26,10 @@ object Aggregators { } - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { val transformedData : Dataset[(String, HostedByItemType)] = df .groupByKey(_._1)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator1) + .agg(Aggregators.hostedByAggregator) .map{ case (id:String , res:(String, HostedByItemType)) => res }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) @@ -69,7 +37,7 @@ object Aggregators { transformedData } - val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { return merge(b, a) @@ -94,4 +62,79 @@ object Aggregators { override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) }.toColumn + def hostedByToSingleDSId(df: Dataset[ HostedByItemType]): Dataset[ HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByToDSAggregator) + .map{ + case (id:String , res: HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + def hostedByToDSAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.id.equals("")){ + return HostedByItemType(b1.id, b1.officialname, b1.issn, b1.eissn, b1.lissn, b1.openAccess || b2.openAccess) + + } + return HostedByItemType(b2.id, b2.officialname, b2.issn, b2.eissn, b2.lissn, b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + + def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHb_id.equals("")){ + b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + } + b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + def resultToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getId)(Encoders.STRING) + .agg(Aggregators.resultToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala new file mode 100644 index 0000000000..03ab09d8e8 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -0,0 +1,147 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} +import eu.dnetlib.dhp.schema.oaf.{Datasource, Journal, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} + +object SparkPrepareHostedByInfoToApply { + + + implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.kryo[DatasourceInfo] + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.kryo[EntityInfo] + + def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { + var lst:List[EntityInfo] = List() + + + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst + } + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst + } + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst + } + lst + } + + def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + .map(r => mapper.readValue(r, classOf[Publication])) + + dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + + } + + + + def prepareDatasourceInfo(spark:SparkSession, datasourcePath:String) : Dataset[DatasourceInfo] = { + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.filter(d => d.getJournal != null ).map(d => DatasourceInfo.newInstance(d.getId, d.getOfficialname.getValue, + d.getJournal.getIssnPrinted, d.getJournal.getIssnOnline, d.getJournal.getIssnLinking)) + + } + def toHostedByItem(input:String): HostedByItemType = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + lazy val json: json4s.JValue = parse(input) + val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + c.values.head + } + + def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { + var lst : List[EntityInfo] = List() + if (input.getEissn != null && !input.getEissn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst + } + + lst + } + + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val hostedByMapPath = parser.get("hostedByMapPath") + + + implicit val formats = DefaultFormats + + + logger.info("Getting the Datasources") + + import spark.implicits._ + + //STEP1: leggere le DS e creare le entries {dsid, dsofficialname, issn, eissn, lissn, openaccess} + val datasourceInfoDataset: Dataset[DatasourceInfo] = prepareDatasourceInfo(spark, "$graphPath/datasource") + + //STEP2: leggere la hostedbymap e raggruppare per datasource id + val hostedByDataset = Aggregators.hostedByToSingleDSId(spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(toHostedByItem))) + + //STEP3: eseguire una join fra le datasource e la hostedby map (left) per settare se la datasource e' open access o no + //ed esplodere l'info della datasource per ogni journal id diverso da nullo + val join : Dataset[EntityInfo] = datasourceInfoDataset.joinWith(hostedByDataset, + datasourceInfoDataset.col("id").equalTo(hostedByDataset.col("id"), "left")) + .map(t2 => { + val dsi : DatasourceInfo = t2._1 + if(t2._2 != null){ + dsi.setOpenAccess(t2._2.openAccess) + } + dsi + }).flatMap(explodeJournalInfo) + + //STEP4: creare la mappa publication id issn, eissn, lissn esplosa + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") + + //STEP5: join di join con resultInfo sul journal_id dal result con left + // e riduzione di tutti i result con lo stesso id in una unica entry + Aggregators.resultToSingleId(resultInfoDataset.joinWith(join, resultInfoDataset.col("journal_id").equalTo(join.col("journal_id")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHb_id(ds.getId) + res.setOpenaccess(ds.getOpenaccess) + res.setName(ds.getName) + } + res + })).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + + + + } + +} From 1695d45bd41a772319d357642df536cd8634faf2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:57:01 +0200 Subject: [PATCH 052/162] Hosted By Map - Test class to verify the preparation of the intermediate information --- .../oa/graph/hostedbymap/TestPrepare.scala | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala new file mode 100644 index 0000000000..f01e4f59fd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DatasourceInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestPrepare extends java.io.Serializable{ + + @Test + def testPrepareDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + val ds :Dataset[DatasourceInfo]= SparkPrepareHostedByInfoToApply.prepareDatasourceInfo(spark, path) + + val mapper :ObjectMapper = new ObjectMapper() + + assertEquals(9, ds.count) + + assertEquals(8, ds.filter(hbi => !hbi.getIssn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.getEissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.getLissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.getIssn.equals("") && hbi.getEissn.equals("") && hbi.getLissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.getEissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getEissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getOfficialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getId.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.getId.startsWith("10|"))) + ds.foreach(e => println(mapper.writeValueAsString(e))) + spark.close() + } + + @Test + def testPrepareHostedByMap():Unit = { + + } + + +} From 117c3d5c67876c323bf3e14cb4233110a0e6945b Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 2 Aug 2021 12:15:58 +0300 Subject: [PATCH 053/162] fixed a typo --- .../stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql index f1ebf0d87f..020787039f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -108,7 +108,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA END) AS NonOpenAccess FROM software s join result_organization ro on s.id=ro.id - join SOURCER.organization o on o.id=ro.organization + join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 group by year, country) tmp; From fd55c77d979dadb051ef37250f0c0f52022de757 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Aug 2021 13:48:42 +0200 Subject: [PATCH 054/162] updated dependency dhp-schemas:2.7.15 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fc4a8a21b9..433c880930 100644 --- a/pom.xml +++ b/pom.xml @@ -741,7 +741,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15] [4.0.3] [6.0.5] [3.1.6] From e826aae84811ec1143ddd7134ef11879846f043b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Aug 2021 14:28:59 +0200 Subject: [PATCH 055/162] using constants from ModelConstants --- .../eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala | 12 ++++++------ .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala index a19c6fb12e..90b65c8f70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala @@ -199,7 +199,7 @@ object BioDBToOAF { d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) } val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version")) - .map(date => OafMapperUtils.structuredProperty(date.date, "UNKNOWN", "UNKNOWN", ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) + .map(date => OafMapperUtils.structuredProperty(date.date, ModelConstants.UNKNOWN, ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) if (relevant_dates != null && relevant_dates.nonEmpty) d.setRelevantdate(relevant_dates.asJava) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) @@ -218,12 +218,12 @@ object BioDBToOAF { if (references_pmid != null && references_pmid.nonEmpty) { - val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) rel.getCollectedfrom List(d, rel) } else if (references_doi != null && references_doi.nonEmpty) { - val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) List(d, rel) } else @@ -243,7 +243,7 @@ object BioDBToOAF { rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) rel.setDataInfo(DATA_INFO) - rel.setRelType("resultResult") + rel.setRelType(ModelConstants.RESULT_RESULT) rel.setSubRelType(subRelType) rel.setRelClass(relClass) @@ -263,7 +263,7 @@ object BioDBToOAF { def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date:String): Relation = { - createRelation(pid,pidType,sourceId,collectedFrom, "supplement","IsSupplementTo", date) + createRelation(pid,pidType,sourceId,collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) } @@ -392,6 +392,6 @@ object BioDBToOAF { i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date))) + List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, GraphCleaningFunctions.cleanDate(input.date))) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index ae4a720626..9a49deebca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -16,7 +16,7 @@ object PubMedToOaf { ) def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { - val result_typologies = getVocabularyTerm("dnet:result_typologies", vocabularies, cobjQualifier.getClassid) + val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { case "dataset" => new Dataset case "publication" => new Publication @@ -68,11 +68,11 @@ object PubMedToOaf { //else We have to find a terms that match the vocabulary otherwise we discard it val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) if (ja.isDefined) { - val cojbCategory = getVocabularyTerm("dnet:publication_resource", vocabularies, ja.get.getValue) + val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) i.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala - .map(s => getVocabularyTerm("dnet:publication_resource", vocabularies, s.getValue)) + .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .find(q => q != null) if (i_type.isDefined) i.setInstancetype(i_type.get) @@ -112,7 +112,7 @@ object PubMedToOaf { if (article.getLanguage != null) { - val term = vocabularies.getSynonymAsQualifier("dnet:languages", article.getLanguage) + val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) if (term != null) result.setLanguage(term) } From ff1ce75e33e0f7cb2293c50e40b7ff83dd89bedb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:32:59 +0200 Subject: [PATCH 056/162] Hosted By Map - modification in the code to prepare the info needed to apply the HostedByMap. There is no need to join datasources with the hbm: all the information needed is in the hosted by map already --- .../SparkPrepareHostedByInfoToApply.scala | 104 +++++++++--------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 03ab09d8e8..7395b24506 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,7 +3,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} -import eu.dnetlib.dhp.schema.oaf.{Datasource, Journal, Publication} + +import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} @@ -12,11 +13,13 @@ import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} + + object SparkPrepareHostedByInfoToApply { - implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.kryo[DatasourceInfo] - implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.kryo[EntityInfo] + implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.bean(classOf[DatasourceInfo]) + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { var lst:List[EntityInfo] = List() @@ -47,25 +50,12 @@ object SparkPrepareHostedByInfoToApply { } - - def prepareDatasourceInfo(spark:SparkSession, datasourcePath:String) : Dataset[DatasourceInfo] = { - implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - - val mapper = new ObjectMapper() - - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) - .map(r => mapper.readValue(r, classOf[Datasource])) - - dd.filter(d => d.getJournal != null ).map(d => DatasourceInfo.newInstance(d.getId, d.getOfficialname.getValue, - d.getJournal.getIssnPrinted, d.getJournal.getIssnOnline, d.getJournal.getIssnLinking)) - - } - def toHostedByItem(input:String): HostedByItemType = { + def toEntityInfo(input:String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] - c.values.head + toEntityItem(c.keys.head, c.values.head) } def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { @@ -73,10 +63,49 @@ object SparkPrepareHostedByInfoToApply { if (input.getEissn != null && !input.getEissn.equals("")){ lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst } + if (input.getLissn != null && !input.getLissn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getLissn, input.getOfficialname, input.getOpenAccess) :: lst + } + if (input.getIssn != null && !input.getIssn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getIssn, input.getOfficialname, input.getOpenAccess) :: lst + } lst } + def joinDsandHBM(left:Dataset[DatasourceInfo], right:Dataset[HostedByItemType]): Dataset[EntityInfo] = { + left.joinWith(right, + left.col("id").equalTo(right.col("id")), "left") + .map(t2 => { + val dsi : DatasourceInfo = t2._1 + if(t2._2 != null){ + val hbi : HostedByItemType = t2._2 + dsi.setOpenAccess(hbi.openAccess) + } + dsi + }).flatMap(explodeJournalInfo) + } + + def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + + EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) + + } + + def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journal_id").equalTo(hbm.col("journal_id")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHb_id(ds.getId) + res.setOpenaccess(ds.getOpenaccess) + res.setName(ds.getName) + } + res + })) + } + def main(args: Array[String]): Unit = { @@ -105,43 +134,20 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere le DS e creare le entries {dsid, dsofficialname, issn, eissn, lissn, openaccess} - val datasourceInfoDataset: Dataset[DatasourceInfo] = prepareDatasourceInfo(spark, "$graphPath/datasource") - //STEP2: leggere la hostedbymap e raggruppare per datasource id - val hostedByDataset = Aggregators.hostedByToSingleDSId(spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(toHostedByItem))) + //STEP1: leggere la hostedbymap e trasformarla in entity info + val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP3: eseguire una join fra le datasource e la hostedby map (left) per settare se la datasource e' open access o no - //ed esplodere l'info della datasource per ogni journal id diverso da nullo - val join : Dataset[EntityInfo] = datasourceInfoDataset.joinWith(hostedByDataset, - datasourceInfoDataset.col("id").equalTo(hostedByDataset.col("id"), "left")) - .map(t2 => { - val dsi : DatasourceInfo = t2._1 - if(t2._2 != null){ - dsi.setOpenAccess(t2._2.openAccess) - } - dsi - }).flatMap(explodeJournalInfo) - - //STEP4: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: creare la mappa publication id issn, eissn, lissn esplosa val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") - //STEP5: join di join con resultInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry - Aggregators.resultToSingleId(resultInfoDataset.joinWith(join, resultInfoDataset.col("journal_id").equalTo(join.col("journal_id")), "left") - .map(t2 => { - val res: EntityInfo = t2._1 - if(t2._2 != null ){ - val ds = t2._2 - res.setHb_id(ds.getId) - res.setOpenaccess(ds.getOpenaccess) - res.setName(ds.getName) - } - res - })).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) - + //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left + // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + joinResHBM(resultInfoDataset, hostedByInfo) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) } + } From 72df8f92320c7d0ddcecece72a8e3502f5b03022 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:34:44 +0200 Subject: [PATCH 057/162] Hosted By Map - removed the aggregator for the datasource (it is no more needed) and added a new aggregator for the results. Changed also the hostedBYMap aggregator --- .../oa/graph/hostedbymap/Aggregators.scala | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 8077efe302..af4ac45077 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -62,42 +62,7 @@ object Aggregators { override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) }.toColumn - def hostedByToSingleDSId(df: Dataset[ HostedByItemType]): Dataset[ HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByToDSAggregator) - .map{ - case (id:String , res: HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - transformedData - } - - def hostedByToDSAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - if(!b1.id.equals("")){ - return HostedByItemType(b1.id, b1.officialname, b1.issn, b1.eissn, b1.lissn, b1.openAccess || b2.openAccess) - - } - return HostedByItemType(b2.id, b2.officialname, b2.issn, b2.eissn, b2.lissn, b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ @@ -115,6 +80,7 @@ object Aggregators { } if(!b1.getHb_id.equals("")){ b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + return b1 } b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) b2 From 1e859706a3516b8103342cbe875004a6c112d35b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:35:23 +0200 Subject: [PATCH 058/162] Hosted By Map - Classes to apply the HBM to results and datasources --- .../SparkApplyHostedByMapToDatasource.scala | 72 +++++++++++++++ .../SparkApplyHostedByMapToResult.scala | 88 +++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala new file mode 100644 index 0000000000..4ecea63f32 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +object SparkApplyHostedByMapToDatasource { + + def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { + dats.joinWith(join, dats.col("id").equalTo(join.col("hb_id")), "left") + .map(t2 => { + val d: Datasource = t2._1 + if (t2._2 != null) { + if (d.getOpenairecompatibility.getClassid.equals(ModelConstants.UNKNOWN)) { + d.getOpenairecompatibility.setClassid("hostedBy") + d.getOpenairecompatibility.setClassname("collected from a compatible aggregator") + } + } + d + })(Encoders.bean((classOf[Datasource]))) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val workingPath = parser.get("workingPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + .map(r => mapper.readValue(r, classOf[Datasource])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + + + //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario + + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala new file mode 100644 index 0000000000..37afc319a2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -0,0 +1,88 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ + +//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance +// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true + + +object SparkApplyHostedByMapToResult { + + def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { + pubs.joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") + .map(t2 => { + val p: Publication = t2._1 + if (t2._2 != null) { + val ei: EntityInfo = t2._2 + val i = p.getInstance().asScala + if (i.size == 1) { + val inst: Instance = i(0) + + inst.getHostedby.setKey(ei.getHb_id) + inst.getHostedby.setValue(ei.getName) + if (ei.getOpenaccess) { + inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + } + + } + } + p + })(Encoders.bean(classOf[Publication])) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val workingPath = parser.get("workingPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val pubs : Dataset[Publication] = spark.read.textFile("$graphPath/publication") + .map(r => mapper.readValue(r, classOf[Publication])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance + // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json("$graphPath/publication") + + + + } + + +} From 90e91486e2cc63a51e51f5e2412ad3ff7c49ba19 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:35:52 +0200 Subject: [PATCH 059/162] Hosted By Map - test class to verify each step in the preparation process --- .../oa/graph/hostedbymap/TestPrepare.scala | 142 +++++++++++++++--- 1 file changed, 124 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index f01e4f59fd..efd598fef7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -1,16 +1,31 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DatasourceInfo +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test class TestPrepare extends java.io.Serializable{ + def getString(input:HostedByItemType):String = { + + import org.json4s.jackson.Serialization.write + implicit val formats = DefaultFormats + + write(input) + } + + @Test - def testPrepareDatasource():Unit = { + def testHostedByMaptoEntityInfo() : Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -20,34 +35,125 @@ class TestPrepare extends java.io.Serializable{ .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("hostedbymap.json").getPath - val ds :Dataset[DatasourceInfo]= SparkPrepareHostedByInfoToApply.prepareDatasourceInfo(spark, path) - val mapper :ObjectMapper = new ObjectMapper() + import spark.implicits._ - assertEquals(9, ds.count) + val mapper:ObjectMapper = new ObjectMapper() - assertEquals(8, ds.filter(hbi => !hbi.getIssn.equals("")).count) - assertEquals(5, ds.filter(hbi => !hbi.getEissn.equals("")).count) - assertEquals(0, ds.filter(hbi => !hbi.getLissn.equals("")).count) + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - assertEquals(0, ds.filter(hbi => hbi.getIssn.equals("") && hbi.getEissn.equals("") && hbi.getLissn.equals("")).count) + val ds :Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365")).count == 1) - assertTrue(ds.filter(hbi => hbi.getEissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getEissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getOfficialname.equals("Thémata")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getId.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) - ds.foreach(hbi => assertTrue(hbi.getId.startsWith("10|"))) ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(20, ds.count) spark.close() } @Test - def testPrepareHostedByMap():Unit = { + def testPublicationtoEntityInfo() : Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("publication.json").getPath + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val ds :Dataset[EntityInfo] = prepareResultInfo(spark, path) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(2, ds.count) + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("0001-396X")).first().getId) + + spark.close() + } + + @Test + def testJoinResHBM (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("0001-396X", ei.getJournal_id) + assertEquals("Academic Therapy", ei.getName) + assertTrue(!ei.getOpenaccess) + + spark.close() + } + + @Test + def testJoinResHBM2 (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication2").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("Academic Therapy", ei.getName) + assertTrue(ei.getOpenaccess) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + spark.close() } + } From ee7ccb98dc716313b620dc47d02b1bb277632246 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:36:18 +0200 Subject: [PATCH 060/162] Hosted By Map - test class to verify the application of the hbm to results and datasource --- .../dhp/oa/graph/hostedbymap/TestApply.scala | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala new file mode 100644 index 0000000000..b78eb978bf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -0,0 +1,134 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestApply extends java.io.Serializable{ + + @Test + def testApplyOnResult (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("publication.json").getPath + val hbm = getClass.getResource("preparedInfo.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + + val pub_ds :Dataset[Publication] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(13, pub_ds.count()) + + val ds:Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) + + assertEquals(13, ds.count) + + val temp: Dataset[(Publication, Publication)] = pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(13, temp.count()) + temp.foreach(t2 => { + val pb : Publication = t2._1 + val pa : Publication = t2._2 + assertEquals(1, pa.getInstance().size()) + assertEquals(1, pb.getInstance().size()) + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")){ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735")) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + + + assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) + assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + + }else{ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals(pb.getInstance().get(0).getHostedby.getValue)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals(pb.getInstance().get(0).getAccessright.getClassid)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals(pb.getInstance().get(0).getAccessright.getClassname)) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb.getInstance().get(0).getAccessright.getOpenAccessRoute) + + } + }) + + spark.close() + } + + + @Test + def testApplyOnDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val dats = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("preparedInfo2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + + val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(10, dats_ds.count()) + + val ds:Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) + + assertEquals(10, ds.count) + + val temp: Dataset[(Datasource, Datasource)] = dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(10, temp.count()) + temp.foreach(t2 => { + val pb : Datasource = t2._1 + val pa : Datasource = t2._2 + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")){ + assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) + assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) + + assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) + + + }else{ + assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassid)) + + } + }) + + spark.close() + + } + +} From 17292c6641439f175b41e665422dea25a19d44f7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:37:08 +0200 Subject: [PATCH 061/162] Hosted By Map - resources for testing purposes --- .../dhp/oa/graph/hostedbymap/datasource.json | 4 ++-- .../oa/graph/hostedbymap/datasourceinfo.json | 9 +++++++++ .../graph/hostedbymap/hostedbyitemtype.json | 17 ++++++++++++++++ .../dhp/oa/graph/hostedbymap/hostedbymap.json | 20 +++++++++++++++++++ .../hostedbymap/iteminfofromhostedbymap.json | 20 +++++++++++++++++++ .../hostedbymap/iteminfofromhostedbymap2.json | 20 +++++++++++++++++++ .../graph/hostedbymap/iteminfofrompublication | 2 ++ .../hostedbymap/iteminfofrompublication2 | 2 ++ .../oa/graph/hostedbymap/preparedInfo.json | 1 + .../oa/graph/hostedbymap/preparedInfo2.json | 2 ++ .../dhp/oa/graph/hostedbymap/publication.json | 13 ++++++++++++ 11 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 4467c702f6..5eb41bff5d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,5 +1,5 @@ -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"UNKNOWN","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"native","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json new file mode 100644 index 0000000000..fd51cc2f28 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2077-3757","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json new file mode 100644 index 0000000000..6e4c11f494 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json @@ -0,0 +1,17 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json new file mode 100644 index 0000000000..188380bc65 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json @@ -0,0 +1,20 @@ +{"0001-396X":{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false}} +{"0015-7899":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":false}} +{"1434-0860":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true}} +{"0022-0116":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"1573-3564":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"0022-0493":{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false}} +{"0022-4715":{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false}} +{"1543-8325":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0023-7639":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0033-3298":{"id":"10|issn___print::91899e3872351895467856daeb798f63","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false}} +{"0033-569X":{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false}} +{"0034-6691":{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false}} +{"0035-1776":{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false}} +{"0037-699X":{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true}} +{"0037-9697":{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false}} +{"0045-6713":{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false}} +{"0047-4800":{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false}} +{"0068-1024":{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false}} +{"0083-6656":{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false}} +{"0090-502X":{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json new file mode 100644 index 0000000000..dfb95816ea --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"1434-0860","name":"Forschung im Ingenieurwesen","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json new file mode 100644 index 0000000000..3d2f2e0050 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication new file mode 100644 index 0000000000..6ee4f94be7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1728-5852","name":"","openaccess":false,"hb_id":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 new file mode 100644 index 0000000000..59bd32d4b0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"","openaccess":false,"hb_id":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json new file mode 100644 index 0000000000..d9c2010821 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -0,0 +1 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json new file mode 100644 index 0000000000..6e1cce3b6e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -0,0 +1,2 @@ +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json new file mode 100644 index 0000000000..602bedbdad --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -0,0 +1,13 @@ +{"author":[{"fullname":"Shu, L.","name":"L.","pid":[],"rank":1,"surname":"Shu"},{"fullname":"Zhang, Y.","name":"Y.","pid":[],"rank":2,"surname":"Zhang"},{"fullname":"Chen, X.","name":"X.","pid":[],"rank":3,"surname":"Chen"},{"fullname":"Wang, S.","name":"S.","pid":[],"rank":4,"surname":"Wang"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"dateofcollection":"2021-07-10T12:27:03.175Z","dateoftransformation":"2021-07-10T12:38:22.255Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s11036-015-0594-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813784431,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-02-26T11:19:47Z","harvestDate":"2021-07-10T12:27:03.175Z","identifier":"oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Shu , L , Zhang , Y , Chen , X & Wang , S 2015 , ' Editorial for Special Issue on Industrial Networks and Intelligent Systems ' , Mobile Networks and Applications , vol. 20 , no. 2 , pp. 121-123 . https://doi.org/10.1007/s11036-015-0594-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial for Special Issue on Industrial Networks and Intelligent Systems"}]} +{"author":[{"fullname":"Hemmilä, Kari","name":"Kari","pid":[],"rank":1,"surname":"Hemmilä"},{"fullname":"Saarni, Risto","name":"Risto","pid":[],"rank":2,"surname":"Saarni"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"dateofcollection":"2021-07-10T12:31:21.927Z","dateoftransformation":"2021-07-10T13:17:02.345Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Millainen on onnistunut ikkunaremontti? Mikä on asukkaan kannalta paras vaihtoehto? Mitä seikkoja ikkunaremontissa on erityisesti otettava huomioon? Kirjan tekijät diplomi-insinööri Kari Hemmilä ja tekniikan lisensiaatti Risto Saarni ovat olleet mukana monissa ikkunoita ja niiden remontointia koskevissa tutkimusprojekteissa. Näiden lisäksi kirja perustuu lukuisista ikkunaremonteista saatuihin kokemuksiin, remonttien lähtökohtiin, toteutukseen sekä toimivuuden ja käyttökokemusten seurantaan. Onnistunut ikkunaremontti on sellainen, johon ollaan tyytyväisiä vielä vuosien päästä. Sen perustana on perehtyminen nykytilaan, huolellinen toteutus ja kunnossapito, joita tässä kirjassa tuodaan esille monesta näkökulmasta."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813851542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-08-11T09:10:31Z","harvestDate":"2021-07-10T12:31:21.927Z","identifier":"oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Rakennustieto oy"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hemmilä , K & Saarni , R 2002 , Ikkunaremontti . Rakennustieto oy , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ikkunaremontti"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Window renovation"}]} +{"author":[{"fullname":"Pavlic, Davor","name":"Davor","pid":[],"rank":1,"surname":"Pavlic"},{"fullname":"Pulkkinen, Antti","name":"Antti","pid":[],"rank":2,"surname":"Pulkkinen"},{"fullname":"Riitahuhta, Asko","name":"Asko","pid":[],"rank":3,"surname":"Riitahuhta"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"2021-07-10T12:34:48.501Z","dateoftransformation":"2021-07-10T13:26:46.605Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813869068,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-26T12:03:33Z","harvestDate":"2021-07-10T12:34:48.501Z","identifier":"oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Design Society"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pavlic , D , Pulkkinen , A & Riitahuhta , A 2006 , A conceptual framework of product family architecture . in DS 47: Proceedings of NordDesign 2006 Conference, Rejkjavik, Iceland, 16.-18.08.2006 . Design Society , pp. 212-222 . < https://www.designsociety.org/publication/24280/A+Conceptual+Framework+of+Product+Family+Architecture >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A conceptual framework of product family architecture"}]} +{"author":[{"fullname":"Carpen, Leena","name":"Leena","pid":[],"rank":1,"surname":"Carpen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.079Z","dateoftransformation":"2021-07-10T13:36:20.24Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/1f27e75d-365e-4810-be48-ef60378d8279"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813880572,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-05-17T10:37:19Z","harvestDate":"2021-07-10T12:27:29.079Z","identifier":"oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Carpen , L 1999 , ' Case: microbial induced corrosion in paper machines ' , RenhetsTeknik , no. 2 , 16 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Case: microbial induced corrosion in paper machines"}]} +{"author":[{"fullname":"Tuomola, Tuomas","name":"Tuomas","pid":[],"rank":1,"surname":"Tuomola"},{"fullname":"Siitonen, Veijo","name":"Veijo","pid":[],"rank":2,"surname":"Siitonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"dateofcollection":"2021-07-10T12:30:02.598Z","dateoftransformation":"2021-07-10T13:50:33.549Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/56ed9ab2-3b3c-4829-a63b-52311969dd30"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813901871,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-27T09:19:12Z","harvestDate":"2021-07-10T12:30:02.598Z","identifier":"oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tuomola , T & Siitonen , V 1970 , Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968 . Valtion teknillinen tutkimuslaitos: Lämpöteknillinen laboratorio. Tiedonanto , no. 4 , VTT Technical Research Centre of Finland , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968"}]} +{"author":[{"fullname":"Paakkari, Jussi","name":"Jussi","pid":[],"rank":1,"surname":"Paakkari"},{"fullname":"Ailisto, Heikki","name":"Heikki","pid":[],"rank":2,"surname":"Ailisto"},{"fullname":"Niskala, Matti","name":"Matti","pid":[],"rank":3,"surname":"Niskala"},{"fullname":"Mäkäräinen, Masa","name":"Masa","pid":[],"rank":4,"surname":"Mäkäräinen"},{"fullname":"Väinämö, Kauko","name":"Kauko","pid":[],"rank":5,"surname":"Väinämö"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.082Z","dateoftransformation":"2021-07-10T13:56:12.148Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/33cedb13-48f3-4326-b5c3-18d248374378"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813911302,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-29T02:07:50Z","harvestDate":"2021-07-10T12:27:29.082Z","identifier":"oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Research and Development Centre of Kajaani"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Paakkari , J , Ailisto , H , Niskala , M , Mäkäräinen , M & Väinämö , K 1999 , Machine vision guided waterjet cutting . in 3rd International Symposium on Optics in Engineering. Kajaani, FI, 19 - 21 Jan. 1999 . Research and Development Centre of Kajaani , Kajaani ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Machine vision guided waterjet cutting"}]} +{"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} +{"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} +{"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} +{"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} +{"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file From 327cddde336a2a590efeba3a76febb1cec36711b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 10:44:13 +0200 Subject: [PATCH 062/162] Hosted By Map - refactoring --- .../graph/hostedbymap/model/EntityInfo.java | 97 +++++++++---------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java index 7bf9d05982..1facaa7923 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -1,69 +1,68 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap.model; import java.io.Serializable; public class EntityInfo implements Serializable { - private String id; - private String journal_id; - private String name; - private Boolean openaccess; - private String hb_id; + private String id; + private String journal_id; + private String name; + private Boolean openaccess; + private String hb_id; + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } - public String getId() { - return id; - } + public String getJournal_id() { + return journal_id; + } - public void setId(String id) { - this.id = id; - } + public void setJournal_id(String journal_id) { + this.journal_id = journal_id; + } - public String getJournal_id() { - return journal_id; - } + public String getName() { + return name; + } - public void setJournal_id(String journal_id) { - this.journal_id = journal_id; - } + public void setName(String name) { + this.name = name; + } - public String getName() { - return name; - } + public Boolean getOpenaccess() { + return openaccess; + } - public void setName(String name) { - this.name = name; - } + public void setOpenaccess(Boolean openaccess) { + this.openaccess = openaccess; + } - public Boolean getOpenaccess() { - return openaccess; - } + public String getHb_id() { + return hb_id; + } - public void setOpenaccess(Boolean openaccess) { - this.openaccess = openaccess; - } + public void setHb_id(String hb_id) { + this.hb_id = hb_id; + } - public String getHb_id() { - return hb_id; - } + public static EntityInfo newInstance(String id, String j_id, String name) { + return newInstance(id, j_id, name, false); - public void setHb_id(String hb_id) { - this.hb_id = hb_id; - } + } - public static EntityInfo newInstance(String id, String j_id, String name){ - return newInstance(id, j_id, name, false); + public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journal_id = j_id; + pi.name = name; + pi.openaccess = openaccess; + pi.hb_id = ""; + return pi; - } - - public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess){ - EntityInfo pi = new EntityInfo(); - pi.id = id; - pi.journal_id = j_id; - pi.name = name; - pi.openaccess = openaccess; - pi.hb_id = ""; - return pi; - - } + } } From 461b8a29a0e52d92cb12aaec7e481844f9124c51 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 10:46:51 +0200 Subject: [PATCH 063/162] removed not needed class --- .../hostedbymap/model/DatasourceInfo.java | 72 ------------------- 1 file changed, 72 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java deleted file mode 100644 index f5ac8f70c5..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java +++ /dev/null @@ -1,72 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostedbymap.model; - -import java.io.Serializable; - -public class DatasourceInfo implements Serializable { - private String id; - private String officialname; - private String issn; - private String eissn; - private String lissn; - private Boolean openAccess; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getOfficialname() { - return officialname; - } - - public void setOfficialname(String officialname) { - this.officialname = officialname; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getLissn() { - return lissn; - } - - public void setLissn(String lissn) { - this.lissn = lissn; - } - - public Boolean getOpenAccess() { - return openAccess; - } - - public void setOpenAccess(Boolean openAccess) { - this.openAccess = openAccess; - } - - public static DatasourceInfo newInstance(String id, String officialname, String issn, String eissn, String lissn){ - DatasourceInfo di = new DatasourceInfo(); - di.id = id; - di.officialname = officialname; - di.issn = (issn != null) ? issn : "" ; - di.eissn = (eissn != null) ? eissn:""; - di.lissn = (lissn != null) ? lissn : ""; - di.openAccess = false; - - return di; - } -} From 9831725073e21ed89e014072c92cc0fc57642cee Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 11:02:17 +0200 Subject: [PATCH 064/162] Hosted By Map - remove from workflow a step not needed. The hbm will be take care also of the integration of the unibi list of gold openaccess journals --- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 4de1a21854..de433b038c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,7 +63,6 @@ - ${wf:conf('resumeFrom') eq 'DownloadGoldIssn'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} @@ -77,18 +76,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
- - - eu.dnetlib.doiboost.GetCSV - --hdfsNameNode${nameNode} - --fileURL${unibiGoldIssnFileURL} - --hdfsPath${hdfsPath} - --classForNameeu.dnetlib.doiboost.UnibiGoldModel - - - - - From a7bf314fd2a8c0fb1075eb2738023e7d802d0364 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:13:30 +0200 Subject: [PATCH 065/162] Hosted By Map - added new aggregator to get just one result per datasource id --- .../oa/graph/hostedbymap/Aggregators.scala | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index af4ac45077..8198b197aa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -103,4 +103,40 @@ object Aggregators { transformedData } + def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHb_id.equals("")){ + return b1 + } + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + + def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getHb_id)(Encoders.STRING) + .agg(Aggregators.datasourceToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } } From 3fc820203bcd76bf4aa81baa027cd76bee7c0460 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 4 Aug 2021 10:13:48 +0200 Subject: [PATCH 066/162] fixed wrong test file --- .../eu/dnetlib/dhp/sx/graph/oaf_to_summary | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary index 4d581044a2..e1fd758b41 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary @@ -1,20 +1,10 @@ -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","target":"50|doi_________::4af011e641e0ba286660fd24a3f603b7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-01-01","dataInfo":null}]} -{"id":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","localIdentifier":[{"identifier":"10.1111/1346-8138.14162","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/1346-8138.14162/fullpdf"}],"typology":"publication","subType":"Article","title":["Guideline of SSc","Diagnostic criteria, severity classification and guidelines of systemic sclerosis"],"author":["Yoshihide Asano","Masatoshi Jinnin","Yasushi Kawaguchi","Masataka Kuwana","Daisuke Goto","Shinichi Sato","Kazuhiko Takehara","Masaru Hatano","Manabu Fujimoto","Naoki Mugii","Hironobu Ihn"],"date":["2018-04-23","2018-01-01"],"subject":null,"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","target":"50|doi_________::7e79063f205480e61ee7fdcf7ab03bad","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2004-11-01","dataInfo":null}]} -{"id":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","localIdentifier":[{"identifier":"10.1002/ajmg.a.30270","schema":"doi","url":"https://onlinelibrary.wiley.com/doi/full/10.1002/ajmg.a.30270"}],"typology":"publication","subType":"Article","title":["Clinical variability in a Noonan syndrome family with a newPTPN11 gene mutation"],"author":["D�bora Romeo Bertola","Alexandre C. Pereira","Paulo S.L. de Oliveira","Chong A. Kim","Jos� Eduardo Krieger"],"date":["2004-09-21T23:19:41Z","2004-11-01"],"subject":[{"scheme":"keywords","value":"Genetics(clinical)"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","target":"50|doi_________::edb21431e0271061e0dddc248300708a","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2010-08-16","dataInfo":null}]} -{"id":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","localIdentifier":[{"identifier":"10.1111/j.1440-1843.2010.01819.x","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/j.1440-1843.2010.01819.x/fullpdf"}],"typology":"publication","subType":"Article","title":["P. aeruginosa: host defence in the lung","Pseudomonas aeruginosa: Host defence in lung diseases"],"author":["Bryan J. WILLIAMS","Joanne DEHNBOSTEL","Timothy S. BLACKWELL"],"date":["2010-08-16"],"subject":[{"scheme":"keywords","value":"Pulmonary and Respiratory Medicine"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f441c6243fd6ae381c520b42349b769","target":"50|doi_________::44ebec98169daae57c106eb1a1072aae","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-07-12","dataInfo":null}]} -{"id":"50|doi_________::3f441c6243fd6ae381c520b42349b769","localIdentifier":[{"identifier":"10.1007/s11901-018-0414-x","schema":"doi","url":"http://link.springer.com/article/10.1007/s11901-018-0414-x/fulltext.html"}],"typology":"publication","subType":"Article","title":["DILI Associated with Skin Reactions"],"author":["Sahand Rahnama-Moghadam","Hans L. Tillmann"],"date":["2018-07-12"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::ff99f7ec03946fa4c8f413d59f75a547","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::f767374d588a8d51de0f129261daa5a7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","target":"50|doi_________::e2d40a313240d3eb979a3172103a4d7f","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2006-11-17","dataInfo":null}]} -{"id":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","localIdentifier":[{"identifier":"10.1007/s11096-006-9043-5","schema":"doi","url":"https://dx.doi.org/10.1007/s11096-006-9043-5"}],"typology":"publication","subType":"Article","title":["Patients’ attitudes towards and experiences of generic drug substitution in Norway"],"author":["Kjoenniksen, Inge","Lindbaek, Morten","Granas, Anne Gerd"],"date":["2006-11-17"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","target":"50|doi_________::fa0760d1427b71b6cb3ffcc739751197","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-09-29","dataInfo":null}]} -{"id":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","localIdentifier":[{"identifier":"10.1080/10408398509527417","schema":"doi","url":"http://www.tandfonline.com/doi/pdf/10.1080/10408398509527417"}],"typology":"publication","subType":"Article","title":["The genusallium. Part 2"],"author":["Gruffydd R. Fenwick","Anthony B. Hanley","John R. Whitaker"],"date":["2009-09-30T13:53:43Z","2009-09-29"],"subject":[{"scheme":"keywords","value":"General Medicine"}],"publisher":["Informa UK Limited"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","target":"50|pmid________::43fb246d61ba89b7f9825d9e02856d17","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-01-01","dataInfo":null}]} -{"id":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","localIdentifier":[{"identifier":"10.1111/j.1742-481x.2009.00634.x","schema":"doi","url":"https://dx.doi.org/10.1111/j.1742-481x.2009.00634.x"}],"typology":"publication","subType":"Article","title":["Venous leg ulcers: patient concordance with compression therapy and its impact on healing and prevention of recurrence"],"author":["Moffatt, Christine","Kommala, Dheerendra","Dourdin, Nathalie","Choe, Yoonhee"],"date":["2009-01-01","2009-11-13T10:40:02Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","target":"50|doi_________::1d47307b88d6bb6757f71bfc56686b74","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2015-01-01","dataInfo":null}]} -{"id":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","localIdentifier":[{"identifier":"10.1111/jocd.12148","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/jocd.12148/fullpdf"}],"typology":"publication","subType":"Article","title":["Assessment of efficacy and tolerability of different concentrations of trichloroacetic acid vs\n. carbon dioxide laser in treatment of xanthelasma palpebrarum"],"author":["Basma Mourad","Lamia H. Elgarhy","Heba-Alla Ellakkawy","Nageh Elmahdy"],"date":["2015-08-07","2015-01-01"],"subject":[{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1602017035423,"id":"50|doi_________::002d84a55111d3d23a3ef388f8f31ad5","originalId":["10.1007/s10956-019-9769-1","9769","50|doiboost____::002d84a55111d3d23a3ef388f8f31ad5"],"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-07-29T19:20:00+0200","dateoftransformation":"1970-01-19T12:20:43+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yeh, Heng-Yi","name":"Heng-Yi","surname":"Yeh","rank":1,"pid":null,"affiliation":[]},{"fullname":"Tsai, Yu-Hsiang","name":"Yu-Hsiang","surname":"Tsai","rank":2,"pid":null,"affiliation":[]},{"fullname":"Tsai, Chin-Chung","name":"Chin-Chung","surname":"Tsai","rank":3,"pid":null,"affiliation":[]},{"fullname":"Chang, Hsin-Yi","name":"Hsin-Yi","surname":"Chang","rank":4,"pid":[{"value":"https://orcid.org/0000-0002-9659-1022","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Education","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Investigating Students’ Conceptions of Technology-Assisted Science Learning: a Drawing Analysis","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-31","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-07-01","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31T13:04:54Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1007/s10956-019-9769-1.pdf","http://link.springer.com/article/10.1007/s10956-019-9769-1/fulltext.html","http://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1596054283851,"id":"50|doi_________::0035704f67a6e839f786b7390c31106e","originalId":["990","10.1186/1471-2377-14-81","50|doiboost____::0035704f67a6e839f786b7390c31106e"],"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-07-29T20:24:43Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Florien W Boele","name":"Florien W","surname":"Boele","rank":1,"pid":null,"affiliation":null},{"fullname":"Irma M Verdonck-de Leeuw","name":"Irma M","surname":"Verdonck-de Leeuw","rank":2,"pid":null,"affiliation":null},{"fullname":"Pim Cuijpers","name":"Pim","surname":"Cuijpers","rank":3,"pid":null,"affiliation":null},{"fullname":"Jaap C Reijneveld","name":"Jaap C","surname":"Reijneveld","rank":4,"pid":null,"affiliation":null},{"fullname":"Jan J Heimans","name":"Jan J","surname":"Heimans","rank":5,"pid":null,"affiliation":null},{"fullname":"Martin Klein","name":"Martin","surname":"Klein","rank":6,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Clinical Neurology","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Medicine","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Internet-based guided self-help for glioma patients with depressive symptoms: design of a randomized controlled trial","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2014-04-10T19:01:28Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-23","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1186/1471-2377-14-81.pdf","http://link.springer.com/article/10.1186/1471-2377-14-81/fulltext.html","http://link.springer.com/content/pdf/10.1186/1471-2377-14-81","http://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"BMC Neurology","issnPrinted":null,"issnOnline":"1471-2377","issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":"14","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619631198141,"id":"50|doi_________::003bc91a6b4c1565813dfdd522697b1a","originalId":["10.1039/c8tc05911j","50|doiboost____::003bc91a6b4c1565813dfdd522697b1a"],"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-04-28T17:33:18Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yuqin Li","name":"Yuqin","surname":"Li","rank":1,"pid":null,"affiliation":null},{"fullname":"Siming Gao","name":"Siming","surname":"Gao","rank":2,"pid":null,"affiliation":null},{"fullname":"Nan Zhang","name":"Nan","surname":"Zhang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xin Huang","name":"Xin","surname":"Huang","rank":4,"pid":null,"affiliation":null},{"fullname":"Jinchang Tian","name":"Jinchang","surname":"Tian","rank":5,"pid":null,"affiliation":null},{"fullname":"Feng Xu","name":"Feng","surname":"Xu","rank":6,"pid":null,"affiliation":null},{"fullname":"Zhizhong Sun","name":"Zhizhong","surname":"Sun","rank":7,"pid":null,"affiliation":null},{"fullname":"Shougen Yin","name":"Shougen","surname":"Yin","rank":8,"pid":null,"affiliation":null},{"fullname":"Xiaoming Wu","name":"Xiaoming","surname":"Wu","rank":9,"pid":null,"affiliation":null},{"fullname":"Wenyi Chu","name":"Wenyi","surname":"Chu","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-4926-4475","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Materials Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Solution-processable, high luminance deep-blue organic light emitting devices based on novel naphthalene bridged bis-triphenylamine derivatives","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-29T03:05:29Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-02-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 .","dataInfo":null},{"value":"A series of naphthalene bridged bis-triphenylamine derivatives with a twisted structure was designed, synthesized and characterized. The dependence of their thermal, photophysical and electrochemical properties and performance as emitters in OLEDs on their chemical structure was systematically studied by the introduction of aryl groups with electron-donating or electron-withdrawing substituents to the naphthalene bridged bis-triphenylamine core. These compounds exhibited steady blue light emissions and high d values ranging from 468 to 500 °C. Most importantly, the deep-blue OLEDs were successfully fabricated using a solution processed method by blending PVK and PBD to improve OLED performance. The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 with CIE coordinates of (0.166, 0.097). These results indicated that these compounds with the twisted naphthalene bridged bis-triphenylamine core could show stable deep-blue electroluminescence properties and introducing the electron-donating group (–OCH 3 ) could enable high luminance and current efficiency for OLEDs.","dataInfo":null}],"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"publisher":{"value":"Royal Society of Chemistry (RSC)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://rsc.li/journals-terms-of-use","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://pubs.rsc.org/en/content/articlepdf/2019/TC/C8TC05911J","http://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Materials Chemistry C","issnPrinted":"2050-7526","issnOnline":"2050-7534","issnLinking":null,"ep":"2698","iss":null,"sp":"2686","vol":"7","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1611897051298,"id":"50|doi_________::0048eb8bb3f289cccc9358bf3726a772","originalId":["10.1016/s1441-3582(03)70132-3","10.1016/S1441-3582(03)70132-3","50|doiboost____::0048eb8bb3f289cccc9358bf3726a772"],"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-03-12T11:12:27+0100","dateoftransformation":"1970-01-19T09:00:07+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Ling, Bith-Hong","name":"Bith-Hong","surname":"Ling","rank":1,"pid":null,"affiliation":[]},{"fullname":"Lockshin, Larry","name":"Larry","surname":"Lockshin","rank":2,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Components of Wine Prices for Australian Wine: How Winery Reputation, Wine Quality, Region, Vintage, and Winery Size Contribute to the Price of Varietal Wines","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2003-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2018-12-09","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-07-07T08:52:15Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-01-27","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":" Australian wines are identified by the varietal names of the grapes rather than the regions as in France and other traditional wine producing countries. This paper uses the concept of hedonic price theory to investigate a range of extrinsic characteristics’ ability to predict prices for different climate regions (warm and cool) and four major wine varieties of Australian wines, two reds (shiraz and cabernet) and two whites (chardonnay and riesling). The effects of winery reputation (wine company/brand), winery size (production scale), age of the wine, and region of origin (wine grape source) contributing to the relationship between price and quality attributes of Australian wines are investigated, based on 1880 observations of bottled wines. Wine quality rating and winery/brand reputation have major effects on the price, while region and size of winery have differential effects depending on the variety of grape. Vintage has only a minor effect. ","dataInfo":null}],"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://journals.sagepub.com/page/policies/text-and-data-mining-license","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/plain","http://journals.sagepub.com/doi/pdf/10.1016/S1441-3582%2803%2970132-3","http://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-07-07T08:52:15Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1613672628083,"id":"50|doi_________::005ca383dbcbecb839c1c4f525636048","originalId":["10.1080/01431161.2017.1302106","50|doiboost____::005ca383dbcbecb839c1c4f525636048"],"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-02-18T18:23:48Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Jun Wang","name":"Jun","surname":"Wang","rank":1,"pid":null,"affiliation":null},{"fullname":"Jinye Peng","name":"Jinye","surname":"Peng","rank":2,"pid":null,"affiliation":null},{"fullname":"Xiaoyue Jiang","name":"Xiaoyue","surname":"Jiang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xiaoyi Feng","name":"Xiaoyi","surname":"Feng","rank":4,"pid":null,"affiliation":null},{"fullname":"Jianhong Zhou","name":"Jianhong","surname":"Zhou","rank":5,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Earth and Planetary Sciences","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Remote-sensing image fusion using sparse representation with sub-dictionaries","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-03-24T08:21:49Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-18","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"publisher":{"value":"Informa UK Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.tandfonline.com/doi/pdf/10.1080/01431161.2017.1302106","http://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"International Journal of Remote Sensing","issnPrinted":"0143-1161","issnOnline":"1366-5901","issnLinking":null,"ep":"3585","iss":null,"sp":"3564","vol":"38","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619364899333,"id":"50|doi_________::006a8e00482f03066c79b472dfe51ba3","originalId":["10.1515/bot-2019-0045","50|doiboost____::006a8e00482f03066c79b472dfe51ba3"],"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-02-08T18:30:38+0100","dateoftransformation":"1970-01-19T08:13:03+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"den Hartog, Cornelis","name":"Cornelis","surname":"den Hartog","rank":1,"pid":null,"affiliation":[]},{"fullname":"Triest, Ludwig","name":"Ludwig","surname":"Triest","rank":2,"pid":[{"value":"https://orcid.org/0000-0002-4946-9614","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Plant Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Aquatic Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ecology, Evolution, Behavior and Systematics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"A profound view and discourse on the typification and status of three confused taxa: Ruppia maritima, R. spiralis and R. cirrhosa","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2020-01-15","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15T09:02:47Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-06-25","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractTaxonomic difficulties have persisted within the genus Ruppia for a long time. We first unravel misconceptions as perceived on different continents and subsequently present a revised interpretation of the identity and typification of three European taxa at species level: Ruppia maritima L., Ruppia spiralis L. ex Dumortier, and Ruppia cirrhosa (Petagna) Grande. To do this, historical specimens, illustrations and original descriptions were studied. We supersede a previous choice of the figure of Buccaferrea maritima, foliis minus acutis Micheli (1729) as the lectotype of R. maritima and type species of the genus Ruppia owing to a serious conflict with the protologue. Based on a meticulous interpretation of protologues and figures in a historical context, we reject the recent view of assigning R. cirrhosa and its proposed lectotype (iconotype) as a homotypic synonym of R. maritima. We agree with an earlier lectotypification of R. spiralis, though for another reason than the above-mentioned abused homotypy. Consequently, R. cirrhosa is a synonym of neither R. maritima or R. spiralis, based on material from Petagna in the Herbarium of Naples designated as the holotype of R. cirrhosa. We argue for three species to be considered as fully independent taxa: R. maritima, R. spiralis and R. cirrhosa.","dataInfo":null}],"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"publisher":{"value":"Walter de Gruyter GmbH","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.degruyter.com/view/journals/botm/63/3/article-p229.xml","https://www.degruyter.com/document/doi/10.1515/bot-2019-0045/pdf","http://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-06-25","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1588058636007,"id":"50|doi_________::00a55b7fae8de31a2ffa4e235e98a7bf","originalId":["10.1109/tns.2004.835620","50|doiboost____::00a55b7fae8de31a2ffa4e235e98a7bf"],"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-04-30T20:50:59+0200","dateoftransformation":"1970-01-19T10:11:12+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Veloso, J.F.C.A.","name":"J.F.C.A.","surname":"Veloso","rank":1,"pid":null,"affiliation":[]},{"fullname":"Amaro, F.","name":"F.","surname":"Amaro","rank":2,"pid":null,"affiliation":[]},{"fullname":"dos Santos, J.M.F.","name":"J.M.F.","surname":"dos Santos","rank":3,"pid":null,"affiliation":[]},{"fullname":"Mir, J.A.","name":"J.A.","surname":"Mir","rank":4,"pid":null,"affiliation":[]},{"fullname":"Derbyshire, G.E.","name":"G.E.","surname":"Derbyshire","rank":5,"pid":null,"affiliation":[]},{"fullname":"Stephenson, R.","name":"R.","surname":"Stephenson","rank":6,"pid":null,"affiliation":[]},{"fullname":"Rhodes, N.J.","name":"N.J.","surname":"Rhodes","rank":7,"pid":null,"affiliation":[]},{"fullname":"Schooneveld, E.M.","name":"E.M.","surname":"Schooneveld","rank":8,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Nuclear and High Energy Physics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Electrical and Electronic Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Nuclear Energy and Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Application of the microhole and strip plate detector for neutron detection","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2004-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-14","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2004-10-19T08:20:44Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"publisher":{"value":"Institute of Electrical and Electronics Engineers (IEEE)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/23/29603/01344292.pdf?arnumber=1344292","http://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-10-19T08:20:44Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1592299453363,"id":"50|doi_________::00ad40afc28e0c860116ac595183a0c0","originalId":["S0377221797004529","10.1016/s0377-2217(97)00452-9","50|doiboost____::00ad40afc28e0c860116ac595183a0c0"],"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-06-16T09:24:13Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Chung-Yee Lee","name":"Chung-Yee","surname":"Lee","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Two-machine flowshop scheduling with availability constraints","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2002-07-25T17:48:22Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"1999-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-04-24","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"https://www.elsevier.com/tdm/userlicense/1.0/","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/plain","http://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"1999-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"European Journal of Operational Research","issnPrinted":"0377-2217","issnOnline":null,"issnLinking":null,"ep":"429","iss":null,"sp":"420","vol":"114","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1609910911587,"id":"50|doi_________::00d8413c6d6f5c091ab6dda88f0a0ecb","originalId":["10.1175/jpo-d-16-0281.1","50|doiboost____::00d8413c6d6f5c091ab6dda88f0a0ecb"],"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-01-06T05:28:31Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"P. B. Smit","name":"P. B.","surname":"Smit","rank":1,"pid":null,"affiliation":null},{"fullname":"T. T. Janssen","name":"T. T.","surname":"Janssen","rank":2,"pid":null,"affiliation":null},{"fullname":"T. H. C. Herbers","name":"T. H. C.","surname":"Herbers","rank":3,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Oceanography","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Nonlinear Wave Kinematics near the Ocean Surface","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-05-09T19:33:59Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-11-16","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractEstimation of second-order, near-surface wave kinematics is important for interpretation of ocean surface remote sensing and surface-following instruments, determining loading on offshore structures, and understanding of upper-ocean transport processes. Unfortunately, conventional wave theories based on Stokes-type expansions do not consider fluid motions at levels above the unperturbed fluid level. The usual practice of extrapolating the fluid kinematics from the unperturbed free surface to higher points in the fluid is generally reasonable for narrowband waves, but for broadband ocean waves this results in dramatic (and nonphysical) overestimation of surface velocities. Consequently, practical approximations for random waves are at best empirical and are often only loosely constrained by physical principles. In the present work, the authors formulate the governing equations for water waves in an incompressible and inviscid fluid, using a boundary-fitted coordinate system (i.e., sigma or s coordinates) to derive expressions for near-surface kinematics in nonlinear random waves from first principles. Comparison to a numerical model valid for highly nonlinear waves shows that the new results 1) are consistent with second-order Stokes theory, 2) are similar to extrapolation methods in narrowband waves, and 3) greatly improve estimates of surface kinematics in random seas.","dataInfo":null}],"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"publisher":{"value":"American Meteorological Society","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://journals.ametsoc.org/view/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://journals.ametsoc.org/jpo/article-pdf/47/7/1657/4810788/jpo-d-16-0281_1.pdf","https://journals.ametsoc.org/downloadpdf/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Physical Oceanography","issnPrinted":"0022-3670","issnOnline":"1520-0485","issnLinking":null,"ep":"1673","iss":null,"sp":"1657","vol":"47","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1582142606255,"id":"50|doi_________::00df77889b1ee0af015524c627f0cf47","originalId":["10.1109/isscc.2010.5433999","50|doiboost____::00df77889b1ee0af015524c627f0cf47"],"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-02-19T20:03:26Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Sameh A Ibrahim","name":"Sameh A","surname":"Ibrahim","rank":1,"pid":null,"affiliation":null},{"fullname":"Behzad Razavi","name":"Behzad","surname":"Razavi","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"A 20Gb/s 40mW equalizer in 90nm CMOS technology","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2010-03-24T14:35:14Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-19","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"publisher":{"value":"IEEE","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0004","classname":"0004","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/5428240/5433812/05433999.pdf?arnumber=5433999","http://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"2010 IEEE International Solid-State Circuits Conference - (ISSCC)","issnPrinted":null,"issnOnline":null,"issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":null,"edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file From 8f7623e77afe2edce3681a81a0283f007f733c9d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:20 +0200 Subject: [PATCH 067/162] Hosted By Map - refactoring and application of the new aggregator --- .../SparkApplyHostedByMapToDatasource.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 4ecea63f32..fad313f1cd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -45,7 +45,7 @@ object SparkApplyHostedByMapToDatasource { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -55,17 +55,15 @@ object SparkApplyHostedByMapToDatasource { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - - + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } From 8ba8c77f92a82104076649fb3ba4fa80d13fd449 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:57 +0200 Subject: [PATCH 068/162] Hosted By Map - refactoring --- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 37afc319a2..312513285c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -29,7 +29,6 @@ object SparkApplyHostedByMapToResult { val i = p.getInstance().asScala if (i.size == 1) { val inst: Instance = i(0) - inst.getHostedby.setKey(ei.getHb_id) inst.getHostedby.setValue(ei.getName) if (ei.getOpenaccess) { @@ -60,7 +59,7 @@ object SparkApplyHostedByMapToResult { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -70,15 +69,15 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile("$graphPath/publication") + val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json("$graphPath/publication") + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) From 1e952cccf644a27180684b62851171ccdfd313d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:15:43 +0200 Subject: [PATCH 069/162] Hosted By Map - refactoring and deletion of not needed methods --- .../SparkPrepareHostedByInfoToApply.scala | 35 ++----------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 7395b24506..d342d57b07 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils @@ -17,8 +17,6 @@ import org.slf4j.{Logger, LoggerFactory} object SparkPrepareHostedByInfoToApply { - - implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.bean(classOf[DatasourceInfo]) implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { @@ -58,33 +56,6 @@ object SparkPrepareHostedByInfoToApply { toEntityItem(c.keys.head, c.values.head) } - def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { - var lst : List[EntityInfo] = List() - if (input.getEissn != null && !input.getEissn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst - } - if (input.getLissn != null && !input.getLissn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getLissn, input.getOfficialname, input.getOpenAccess) :: lst - } - if (input.getIssn != null && !input.getIssn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getIssn, input.getOfficialname, input.getOpenAccess) :: lst - } - - lst - } - - def joinDsandHBM(left:Dataset[DatasourceInfo], right:Dataset[HostedByItemType]): Dataset[EntityInfo] = { - left.joinWith(right, - left.col("id").equalTo(right.col("id")), "left") - .map(t2 => { - val dsi : DatasourceInfo = t2._1 - if(t2._2 != null){ - val hbi : HostedByItemType = t2._2 - dsi.setOpenAccess(hbi.openAccess) - } - dsi - }).flatMap(explodeJournalInfo) - } def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { @@ -123,7 +94,7 @@ object SparkPrepareHostedByInfoToApply { val graphPath = parser.get("graphPath") - val outputPath = parser.get("outputPath") + val outputPath = parser.get("preparedInfoPath") val hostedByMapPath = parser.get("hostedByMapPath") @@ -139,7 +110,7 @@ object SparkPrepareHostedByInfoToApply { val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) //STEP2: creare la mappa publication id issn, eissn, lissn esplosa - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource From eccf3851b0352b557a4cce3dbd90d0fcf8ee3408 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:16:30 +0200 Subject: [PATCH 070/162] Hosted By Map - refactoring --- .../dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index ca5e82a4a1..de41ebf28f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -23,10 +23,6 @@ object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - - - - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 @@ -217,7 +213,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) } From 67ba4c40e05bc90da931f7a621e11038849453db Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:17:28 +0200 Subject: [PATCH 071/162] Hosted By Map - added parameter resources --- .../hostedbymap/hostedby_apply_params.json | 36 ++++++++++++++++++ .../hostedbymap/hostedby_prepare_params.json | 37 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json new file mode 100644 index 0000000000..e34398290a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json @@ -0,0 +1,36 @@ + +[ + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json new file mode 100644 index 0000000000..9809d71b56 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"hbmp", + "paramLongName":"hostedByMapPath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] From e94ae0b1de11f31dad6367605538371d9fe13d6b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:18:11 +0200 Subject: [PATCH 072/162] Hosted By Map - extention of the workflow to consider also the application of the map to publications and datasources --- .../graph/hostedbymap/oozie_app/workflow.xml | 106 ++++++++++++++++-- 1 file changed, 99 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index ecf6c3b316..30e500da3c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -9,6 +9,10 @@ outputPath the output path + + hostedByMapPath + the output path + sparkDriverMemory memory for driver process @@ -65,23 +69,30 @@ - + + + + + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - - + + - @@ -134,13 +145,94 @@ --datasourcePath${sourcePath}/datasource --workingPath${workingDir} - --outputPath${outputPath} + --outputPath${hostedByMapPath} --masteryarn-cluster - + + + + yarn-cluster + Prepare info to apply the hbm + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hostedByMapPath${hostedByMapPath} + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + + + + + + yarn-cluster + Apply hbm to result + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/publication + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + yarn-cluster + Apply hbm to datasource + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToDatasource + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/datasource + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + From eb8c3f85944c8b915d455270d9a3ec2e26047244 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:19:17 +0200 Subject: [PATCH 073/162] Hosted By Map - test modified because of the application of the new aggregator on datasources --- .../java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index b78eb978bf..c48c3b35d3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -98,7 +98,7 @@ class TestApply extends java.io.Serializable{ val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo]))) assertEquals(10, dats_ds.count()) @@ -122,7 +122,7 @@ class TestApply extends java.io.Serializable{ }else{ assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) - assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) } }) From c7b71647c63cab956cb183775752df7c2b9d9b02 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:20:02 +0200 Subject: [PATCH 074/162] Hosted By Map - modification of the resource for testing the presence of only one entry per datasource id --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json index 6e1cce3b6e..64e2433edb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -1,2 +1,3 @@ {"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} {"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} From 83c04e5d28472fe5a199f88b50b2b81e3b15c2e9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 4 Aug 2021 10:37:57 +0200 Subject: [PATCH 075/162] mapping test for dataset records adapted to reflect the delegated pid authority (zenodo) --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 60970f4d6c..c431b4dd82 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -24,6 +24,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -250,7 +251,24 @@ public class MappersTest { final Relation r1 = (Relation) list.get(1); final Relation r2 = (Relation) list.get(2); + assertEquals(d.getId(), r1.getSource()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget()); + assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType()); + assertEquals(ModelConstants.OUTCOME, r1.getSubRelType()); + assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass()); + assertTrue(r1.getValidated()); + assertEquals("2020-01-01", r1.getValidationDate()); + + assertEquals(d.getId(), r2.getTarget()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource()); + assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType()); + assertEquals(ModelConstants.OUTCOME, r2.getSubRelType()); + assertEquals(ModelConstants.PRODUCES, r2.getRelClass()); + assertTrue(r2.getValidated()); + assertEquals("2020-01-01", r2.getValidationDate()); + assertValidId(d.getId()); + assertEquals("50|doi_________::000374d100a9db469bd42b69dbb40b36", d.getId()); assertEquals(2, d.getOriginalId().size()); assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -304,10 +322,12 @@ public class MappersTest { }); assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid()); assertNotNull(d.getInstance().get(0).getPid()); - assertTrue(d.getInstance().get(0).getPid().isEmpty()); + assertFalse(d.getInstance().get(0).getPid().isEmpty()); - assertEquals("doi", d.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); - assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); + assertEquals("doi", d.getInstance().get(0).getPid().get(0).getQualifier().getClassid()); + assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getPid().get(0).getValue()); + + assertTrue(d.getInstance().get(0).getAlternateIdentifier().isEmpty()); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); @@ -738,12 +758,11 @@ public class MappersTest { } private void assertValidId(final String id) { - System.out.println(id); + // System.out.println(id); assertEquals(49, id.length()); - assertEquals('|', id.charAt(2)); - assertEquals(':', id.charAt(15)); - assertEquals(':', id.charAt(16)); + assertEquals(IdentifierFactory.ID_PREFIX_SEPARATOR, id.substring(2, 3)); + assertEquals(IdentifierFactory.ID_SEPARATOR, id.substring(15, 17)); } private List vocs() throws IOException { From 1965e4eece11830cbf373b5345030c1198f72fb8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 18:29:03 +0200 Subject: [PATCH 076/162] new workflow for downloading the dump of crossref and unpack it --- .../oozie_app/config-default.xml | 42 ++++++ .../downloadandunpack/oozie_app/download.sh | 2 + .../downloadandunpack/oozie_app/mock.sh | 2 + .../downloadandunpack/oozie_app/workflow.xml | 121 ++++++++++++++++++ .../doiboost/preprocess/oozie_app/download.sh | 2 +- 5 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml new file mode 100644 index 0000000000..508202e301 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh new file mode 100644 index 0000000000..1bb7aff1f0 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh new file mode 100644 index 0000000000..30386d6134 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml new file mode 100644 index 0000000000..91de3bfb37 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml @@ -0,0 +1,121 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefdumpfilename} + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh index dfb0db7082..30386d6134 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -1,2 +1,2 @@ -#!bin/bash +#!/bin/bash curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file From 5faeefbda8160685a16d7628d68fa534f03146c7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 10:54:03 +0200 Subject: [PATCH 077/162] added script to download the dump,changed the workflow input paramenters --- .../crossref/ExtractCrossrefRecords.java | 8 +- .../oozie_app/download.sh | 2 + .../oozie_app/workflow.xml | 119 +++++++++--------- 3 files changed, 68 insertions(+), 61 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7cae1fcbd..c7d3770a04 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -29,16 +29,16 @@ public class ExtractCrossrefRecords { "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); - final String workingPath = parser.get("workingPath"); + final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(hdfsServerUri.concat(crossrefFileNameTarGz)); + Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); + conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); + FileSystem fs = FileSystem.get(URI.create(workingPath), conf); FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); try (TarArchiveInputStream tais = new TarArchiveInputStream( new GzipCompressorInputStream(crossrefFileStream))) { diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh new file mode 100644 index 0000000000..662f5313c1 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 506d86a081..2d3ad6bc3f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -1,13 +1,5 @@ - + - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - sparkDriverMemory memory for driver process @@ -18,27 +10,82 @@ sparkExecutorCores - 2 number of cores used by single executor + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + - + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + ${jobTracker} ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ + --outputPath${crossrefDumpPath}/files/ @@ -48,7 +95,7 @@ yarn-cluster cluster - SparkGenerateCrossrefDataset + SparkUnpackCrossrefEntries eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries dhp-doiboost-${projectVersion}.jar @@ -63,56 +110,14 @@ --masteryarn-cluster --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates + --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - \ No newline at end of file From bd096f51706b60f740430c007a9d3c67ec66d519 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 10:55:43 +0200 Subject: [PATCH 078/162] removed not needed param file --- .../generate_dataset_params.json | 21 ------------------- 1 file changed, 21 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json deleted file mode 100644 index 63e0803372..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", - "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file From b7079804cb4e55b1d027c5e1857bfc8f000055d7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:34:35 +0200 Subject: [PATCH 079/162] CrossrefDump - put token among the parameters --- .../doiboost/crossref_dump_reader/oozie_app/workflow.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 2d3ad6bc3f..6e4f179128 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -22,6 +22,10 @@ crossrefDumpPath the Crossref dump path + + crossrefdumptoken + the token for the API dump path + @@ -37,7 +41,7 @@ - + @@ -69,6 +73,7 @@ ${url} ${crossrefDumpPath} ${crossrefdumpfilename} + ${crossrefdumptoken} HADOOP_USER_NAME=${wf:user()} download.sh From 54a6cbb24497ae4aa52b93a33f961ce6d571bcfd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:41:10 +0200 Subject: [PATCH 080/162] CrossrefDump - put token among the parameters --- .../dhp/doiboost/crossref_dump_reader/oozie_app/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh index 662f5313c1..2ce7042835 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put $2/$3 \ No newline at end of file +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put $2/$3 \ No newline at end of file From 964f97ed4d16db8a95555a7051f1a2d18181c418 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:06 +0200 Subject: [PATCH 081/162] cleanup --- dhp-workflows/pom.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index cfdf365732..22ee776197 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -24,8 +24,6 @@ dhp-dedup-openaire dhp-enrichment dhp-graph-provision - - dhp-blacklist dhp-stats-update dhp-stats-promote From da20fceaf7aefbee08844e9257b47ba97e5e2961 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:53:45 +0200 Subject: [PATCH 082/162] removed all the part related to the crossref dump download since it is done in a separate workflow --- .../generate_dataset_params.json | 21 --- .../oozie_app/config-default.xml | 42 ------ .../oozie_app/workflow.xml | 118 ----------------- .../oozie_app/config-default.xml | 42 ------ .../downloadandunpack/oozie_app/download.sh | 2 - .../downloadandunpack/oozie_app/mock.sh | 2 - .../downloadandunpack/oozie_app/workflow.xml | 121 ------------------ .../doiboost/preprocess/oozie_app/download.sh | 2 - .../preprocess/oozie_app/workflow.xml | 76 +---------- 9 files changed, 2 insertions(+), 424 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json deleted file mode 100644 index 63e0803372..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", - "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml deleted file mode 100644 index 508202e301..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml deleted file mode 100644 index 506d86a081..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ /dev/null @@ -1,118 +0,0 @@ - - - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - 2 - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz - --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml deleted file mode 100644 index 508202e301..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh deleted file mode 100644 index 1bb7aff1f0..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh deleted file mode 100644 index 30386d6134..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml deleted file mode 100644 index 91de3bfb37..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml +++ /dev/null @@ -1,121 +0,0 @@ - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - crossrefdumpfilename - the Crossref input path - - - crossrefDumpPath - the Crossref dump path - - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - ${jobTracker} - ${nameNode} - - - mapred.job.queue.name - ${queueName} - - - download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - HADOOP_USER_NAME=${wf:user()} - download.sh - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefdumpfilename} - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh deleted file mode 100644 index 30386d6134..0000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index ecaeda7091..3700ce5d97 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,14 +63,10 @@ - ${wf:conf('resumeFrom') eq 'Skip'} - ${wf:conf('resumeFrom') eq 'ImportCrossRef'} - ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} - ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -79,67 +75,6 @@ - - - ${jobTracker} - ${nameNode} - - - mapred.job.queue.name - ${queueName} - - - download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - download.sh - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefdumpfilename} - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - yarn-cluster @@ -166,14 +101,7 @@ - - - - - - - - + From 577f3b1ac8696abf405de9969f4efbd81d8bba95 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:58 +0200 Subject: [PATCH 083/162] added dnet workflows responsible for the graph construction, enrichment, provision --- .../actionset_bipFinderScores.xml | 100 ++ .../dhp/actionmanager/actionset_datacite.xml | 144 +++ .../dhp/actionmanager/actionset_doiboost.xml | 200 ++++ .../actionset_h2020_classification.xml | 132 +++ .../actionset_orcidworks-no-doi.xml | 101 ++ .../dhp/actionmanager/actionset_ror.xml | 89 ++ .../dhp/provision/00_beta_graph_for_IIS.xml | 628 +++++++++++ .../dhp/provision/00_prod_graph_for_IIS.xml | 437 ++++++++ .../eu/dnetlib/dhp/provision/01_IIS.xml | 225 ++++ .../dnetlib/dhp/provision/02_beta_graph.xml | 995 ++++++++++++++++++ .../dnetlib/dhp/provision/02_prod_graph.xml | 778 ++++++++++++++ .../dnetlib/dhp/provision/03_graph2hive.xml | 74 ++ .../dnetlib/dhp/provision/04_graph2solr.xml | 99 ++ .../dnetlib/dhp/provision/05_graph2stats.xml | 100 ++ .../dhp/provision/06_publish_stats.xml | 87 ++ .../eu/dnetlib/dhp/provision/07_broker.xml | 131 +++ 16 files changed, 4320 insertions(+) create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml new file mode 100644 index 0000000000..e4680a0cf7 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Import bipFinder scores + Import bipFinder scores + 30 + + + declares the path holding the BIP SCORE data + + bipScorePath + /data/bip/20201206 + + + + + + + declares the path holding the LATEST GRAPH dump + + latestGraphPath + /tmp/stable_ids/graph/14_graph_blacklisted + + + + + + + prepare action sets + + + [ + { + 'set' : 'bipfinder-scores', + 'jobProperty' : 'export_action_set_bipfinder-scores', + 'enablingProperty' : 'active_bipfinder-scores', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare AS for the bipFinder scores integration + + executeOozieJob + IIS + + { + 'bipScorePath':'bipScorePath', + 'inputPath':'latestGraphPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bipfinder' + } + + build-report + + + + + + + update action sets + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml new file mode 100644 index 0000000000..d2ea9d35f7 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml @@ -0,0 +1,144 @@ + +
+ + + + + +
+ + Import Datacite ActionSet + Import InfoSpace + 30 + + + set the resume from + + resumeFrom + TransformDatacite + + + + + + + shall the datacite mapping produce the links? + + exportLinks + false + + + + + + + set the path storing the OAF Datacite records + + oafTargetPath + /data/datacite/production/datacite_oaf + + + + + + + set the input path for Datacite content + + datacitePath + /data/datacite + + + + + + + prepare action sets + + + [ + { + 'set' : 'datacite', + 'jobProperty' : 'export_action_set_datacite', + 'enablingProperty' : 'active_datacite', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'mainPath' : 'datacitePath', + 'oafTargetPath' : 'oafTargetPath', + 'exportLinks' : 'exportLinks', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'sourcePath' : 'oafTargetPath', + 'outputPath' : 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210723_163342_752 + 2021-07-23T16:44:05+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml new file mode 100644 index 0000000000..ce9eb8f4c4 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml @@ -0,0 +1,200 @@ + +
+ + + + + +
+ + Import DOIboost + Import InfoSpace + 30 + + + set the input path for MAG + + MAGDumpPath + /data/doiboost/mag-2021-02-15 + + + + + + + set the input path for CROSSREF dump + + crossrefDumpPath + /data/doiboost/crossref/ + + + + + + + set the intermediate path used to process MAG + + intermediatePathMAG + /data/doiboost/input/mag + + + + + + + set the input path for Crossref + + inputPathCrossref + /data/doiboost/input/crossref + + + + + + + set the timestamp for the Crossref incremental harvesting + + crossrefTimestamp + 1607614921429 + + + + + + + set the input path for UnpayWall + + inputPathUnpayWall + /data/doiboost/input/unpayWall + + + + + + + set the input path for ORCID + + inputPathOrcid + /data/orcid_activities_2020/last_orcid_dataset + + + + + + + set the working path for ORCID + + workingPathOrcid + /data/doiboost/input/orcid + + + + + + + set the hostedBy map path + + hostedByMapPath + /data/doiboost/input/hostedBy/hbMap.gz + + + + + + + set the oozie workflow name from which the execution will be resumed + + resumeFrom + ConvertCrossrefToOAF + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'doiboost', + 'jobProperty' : 'export_action_set_doiboost', + 'enablingProperty' : 'active_doiboost', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of DOIBoost + + executeOozieJob + IIS + + { + 'crossrefTimestamp' : 'crossrefTimestamp', + 'hostedByMapPath' : 'hostedByMapPath', + 'MAGDumpPath' :'MAGDumpPath', + 'inputPathMAG' : 'intermediatePathMAG', + 'inputPathCrossref' : 'inputPathCrossref', + 'crossrefDumpPath':'crossrefDumpPath', + 'inputPathUnpayWall' : 'inputPathUnpayWall', + 'inputPathOrcid' : 'inputPathOrcid', + 'outputPath' : 'outputPath', + 'workingPathOrcid':'workingPathOrcid', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app', + 'workingPath' : '/data/doiboost/process_p', + 'sparkExecutorCores' : '2', + 'sparkExecutorIntersectionMemory' : '12G', + 'sparkExecutorMemory' : '8G', + 'esServer' : '[es_server]', + 'esIndex' : 'crossref' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210714_075237_381 + 2021-07-14T09:51:46+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml new file mode 100644 index 0000000000..6d29e25a12 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml @@ -0,0 +1,132 @@ + +
+ + + + + +
+ + Import H2020classification + Import H2020classification + 30 + + + sets the URL to download the project file + + projectFileURL + https://cordis.europa.eu/data/cordis-h2020projects.csv + + + + + + + sets the URL to download the programme file + + programmeFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv + + + + + + + sets the URL to download the topics file + + topicFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx + + + + + + + sets the name of the sheet in the topic file to be read + + sheetName + Topics + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'h2020classification', + 'jobProperty' : 'export_action_set_h2020classification', + 'enablingProperty' : 'active_h2020classification', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the H2020 Classification + + executeOozieJob + IIS + + { + 'outputPath': 'outputPath', + 'sheetName':'sheetName', + 'projectFileURL' : 'projectFileURL', + 'programmeFileURL' : 'programmeFileURL', + 'topicFileURL':'topicFileURL' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/h2020classification', + 'postgresURL':'', + 'postgresUser':'', + 'postgresPassword':'' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210524_084803_740 + 2021-05-24T09:05:50+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml new file mode 100644 index 0000000000..c5642dadcc --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml @@ -0,0 +1,101 @@ + +
+ + + + + +
+ + Import Orcid + Import InfoSpace + 30 + + + set the hdfs input path + + inputPath + /data/orcid_activities_2020 + + + + + + + set the temporary path where to store the action set + + processOutputPath + /tmp/prod_provision/working_path_orcid_activities + + + + + + + prepare action sets + + + [ + { + 'set' : 'orcidworks-no-doi', + 'jobProperty' : 'export_action_set_orcidworks_no_doi', + 'enablingProperty' : 'active_orcidworks_no_doi', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the Orcid No Doi + + executeOozieJob + IIS + + { + 'workingPath' : 'inputPath', + 'processOutputPath' : 'processOutputPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app', + 'spark2GenNoDoiDatasetMaxExecutors' : '200', + 'spark2GenNoDoiDatasetExecutorMemory' : '2G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210713_170819_470 + 2021-07-13T17:28:26+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml new file mode 100644 index 0000000000..4810fda3bb --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml @@ -0,0 +1,89 @@ + +
+ + + + + +
+ + Update ROR actionset + Import Infospace + 30 + + + Set the base path containing the no_doi_dataset folder + + inputPath + /data/ror/ror-data-2021-04-06.json + + + + + + + prepare action sets + + + [ + { + 'set' : 'ror', + 'jobProperty' : 'export_action_set_ror', + 'enablingProperty' : 'active_ror', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + update the ROR actionset + + executeOozieJob + IIS + + { + 'rorJsonInputPath' : 'inputPath', + 'rorActionSetPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app', + 'workingDir': '/tmp/import_ror_actionset_prod' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210518_143542_478 + 2021-05-18T14:37:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml new file mode 100644 index 0000000000..ef2205e322 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -0,0 +1,628 @@ + +
+ + + + + +
+ + Graph construction for IIS [BETA] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_inference/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_inference/graph/02_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/beta_inference/graph/03_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_inference/graph/04_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_inference/graph/05_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_inference/graph/06_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_inference/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + false + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_inference/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210730_094240_462 + 2021-07-30T15:04:19+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml new file mode 100644 index 0000000000..e5ce3d7109 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -0,0 +1,437 @@ + +
+ + + + + +
+ + Graph construction for IIS [PROD NEW] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_inference/graph/00_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_inference/graph/01_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/prod_inference/graph/02_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_inference/graph/03_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_inference/graph/04_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_inference/graph/05_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + should apply the relations id patching based on the provided idMapping? + + shouldPatchRelations + false + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210719_165159_86 + 2021-07-19T20:45:09+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml new file mode 100644 index 0000000000..126d5f58d8 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml @@ -0,0 +1,225 @@ + +
+ + + + + +
+ + IIS main workflow V3 [PROD] + IIS + 30 + + + start + + + + + + + Set a regex of funder shortnames to exclude from the project reference processing + + referenceextraction_project_fundingclass_blacklist_regex + ^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$ + + + + + + + prepare action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-06-23 + IIS + /tmp/prod_inference/graph/05_graph_cleaned + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + 'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'import_infospace_graph_location', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + false + build-report + + + + + + + update action sets + + + + + + + + wf_20210719_221139_780 + 2021-07-21T01:23:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml new file mode 100644 index 0000000000..766783f8bb --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -0,0 +1,995 @@ + +
+ + + + + +
+ + Graph Construction [BETA] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_provision/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_provision/graph/02_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/beta_provision/graph/03_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_provision/graph/04_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/beta_provision/graph/05_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_provision/graph/06_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/beta_provision/graph/07_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/beta_provision/graph/08_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/beta_provision/graph/09_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/beta_provision/graph/10_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/beta_provision/graph/11_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/beta_provision/graph/12_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/beta_provision/graph/13_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_provision/graph/14_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_provision/graph/15_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + true + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_provision/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_provision/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup' + } + + build-report + + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/orcid', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/funding', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/beta_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210803_134357_367 + 2021-08-03T17:08:11+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml new file mode 100644 index 0000000000..be6155f2fb --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -0,0 +1,778 @@ + +
+ + + + + +
+ + Graph construction [PROD NEW] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_provision/graph/00_prod_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_provision/graph/01_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/prod_provision/graph/02_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_provision/graph/03_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/prod_provision/graph/04_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_provision/graph/05_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/prod_provision/graph/06_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/prod_provision/graph/07_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/prod_provision/graph/08_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/prod_provision/graph/09_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/prod_provision/graph/10_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/prod_provision/graph/11_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/prod_provision/graph/12_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_provision/graph/13_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup' + } + + build-report + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/orcid', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/funding', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/prod_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210723_171026_279 + 2021-07-24T00:00:39+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml new file mode 100644 index 0000000000..836e69d6f3 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml @@ -0,0 +1,74 @@ + +
+ + + + + +
+ + Graph to HiveDB [PROD] + Data Provision + 30 + + + Set the path containing the AGGREGATOR graph + + inputPath + + + + + + + + Set the target path to store the RAW graph + + hiveDbName + + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputPath' : 'inputPath', + 'hiveDbName' : 'hiveDbName' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app', + 'sparkDriverMemory' : '4G', + 'sparkExecutorMemory' : '10G', + 'sparkExecutorCores' : '3' + } + + build-report + + + + + + + + wf_20210728_075001_400 + 2021-07-28T08:04:00+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml new file mode 100644 index 0000000000..6cdf41bb63 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml @@ -0,0 +1,99 @@ + +
+ + + + + +
+ + Update Solr [PROD] + Data Provision + 30 + + + Set the path containing the GRAPH to index + + inputGraphRootPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the target path to store the RAW graph + + format + DMF + + + + + + + Set the lookup address + + isLookupUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputGraphRootPath' : 'inputGraphRootPath', + 'isLookupUrl' : 'isLookupUrl', + 'format' : 'format' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app', + 'sourceMaxRelations' : '1000', + 'targetMaxRelations' : '10000000', + 'relPartitions' : '3000', + 'batchSize' : '2000', + 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy', + 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', + 'resumeFrom' : 'prepare_relations', + 'shouldIndex' : 'true', + 'outputFormat' : 'SOLR', + 'sparkDriverMemoryForJoining' : '3G', + 'sparkExecutorMemoryForJoining' : '7G', + 'sparkExecutorCoresForJoining' : '4', + 'sparkDriverMemoryForIndexing' : '2G', + 'sparkExecutorMemoryForIndexing' : '2G', + 'sparkExecutorCoresForIndexing' : '64', + 'sparkNetworkTimeout' : '600', + 'workingDir' : '/tmp/prod_provision/working_dir/update_solr' + } + + build-report + + + + + + + + wf_20210724_062705_620 + 2021-07-25T13:25:37+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml new file mode 100644 index 0000000000..4dfae3c7d6 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Update Stats [PROD] + Data Provision + 30 + + + Set the OpenAIRE graph DB name + + openaire_db_name + openaire_prod_yyyyMMdd + + + + + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + update the content in the stats DB + + executeOozieJob + IIS + + { + 'openaire_db_name' : 'openaire_db_name', + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app', + 'hive_timeout' : '15000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_shadow_name' : 'openaire_prod_stats_shadow', + 'external_stats_db_name' : 'stats_ext', + 'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow', + 'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow', + 'context_api_url' : 'https://services.openaire.eu/openaire' + } + + build-report + + + + + + + + wf_20210725_065608_71 + 2021-07-26T07:35:55+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml new file mode 100644 index 0000000000..d8def071fd --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml @@ -0,0 +1,87 @@ + +
+ + + + + +
+ + Publish Stats [PROD] + Content Publishing + 35 + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + publishes the stats DB to the public schema + + executeOozieJob + IIS + + { + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app', + 'hive_timeout' : '150000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_production_name' : 'openaire_prod_stats', + 'monitor_db_production_name' : 'openaire_prod_stats_monitor', + 'observatory_db_production_name' : 'openaire_prod_stats_observatory' + } + + build-report + + + + + + + + wf_20210727_160728_625 + 2021-07-27T16:53:01+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml new file mode 100644 index 0000000000..cf337fd7e0 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml @@ -0,0 +1,131 @@ + +
+ + + + + +
+ + Update Broker events [PROD OCEAN] + Data Provision + 30 + + + Set the path containing the GRAPH to scan + + graphInputPath + + + + + + + + Set the datasource Ids Whitelist + + datasourceIdWhitelist + openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68 + + + + + + + Set the datasource type Whitelist + + datasourceTypeWhitelist + pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository + + + + + + + Set the datasource Id Blacklist + + datasourceIdBlacklist + - + + + + + + + Set the TOPIC whitelist (* = all topics) + + topicWhitelist + ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID + + + + + + + Set the output path to store the Event records + + outputDir + /var/lib/dnet/broker_PROD/events + + + + + + + wait configurations + + + + + + + update the BROKER events + + executeOozieJob + IIS + + { + 'graphInputPath' : 'graphInputPath', + 'datasourceIdWhitelist' : 'datasourceIdWhitelist', + 'datasourceTypeWhitelist' : 'datasourceTypeWhitelist', + 'datasourceIdBlacklist' : 'datasourceIdBlacklist', + 'topicWhitelist' : 'topicWhitelist', + 'outputDir' : 'outputDir' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app', + 'esEventIndexName' : '', + 'esNotificationsIndexName' : '', + 'esIndexHost' : '', + 'maxIndexedEventsForDsAndTopic' : '100', + 'esBatchWriteRetryCount' : '8', + 'esBatchWriteRetryWait' : '60s', + 'esBatchSizeEntries' : '200', + 'esNodesWanOnly' : 'true', + 'brokerApiBaseUrl' : '', + 'brokerDbUrl' : '', + 'brokerDbUser' : '', + 'brokerDbPassword' : '', + 'sparkDriverMemory' : '3G', + 'sparkExecutorMemory' : '7G', + 'sparkExecutorCores' : '6', + 'workingDir' : '/tmp/prod_provision/working_dir/broker_events' + } + + build-report + + + + + + + + wf_20210709_073839_206 + 2021-07-09T11:01:01+00:00 + FAILURE + + + +
\ No newline at end of file From 2efa5abda5e4cbebcc4061c0f3866fe431d3163e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 12:28:36 +0200 Subject: [PATCH 084/162] refactoring --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 30e500da3c..71b20b356d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -123,8 +123,6 @@
- - From 9731a6144a4ed9df6a513b81cc5c390b9328971b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 10 Aug 2021 17:49:45 +0200 Subject: [PATCH 085/162] hostedbymap - in case the journal is open access the access may be changed also for the best access right in the result --- .../dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 312513285c..4c3b98f3b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -34,6 +34,7 @@ object SparkApplyHostedByMapToResult { if (ei.getOpenaccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } } From b688567db5238276a7826b7f3e3ccd9962f44f53 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 10:12:10 +0200 Subject: [PATCH 086/162] hostedbymap - modified part of test to check the bestaccessright changed --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 8 ++++++-- .../eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index c48c3b35d3..a48e527020 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -55,13 +55,17 @@ class TestApply extends java.io.Serializable{ assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) + assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getBestaccessright.getClassname.equals("not available")) }else{ assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json index 602bedbdad..8f6842bf35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -7,7 +7,7 @@ {"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} {"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} {"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} -{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} {"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} {"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} {"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file From 2ee21da43b8a93b8db9f75528e8c4e268e1b6187 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 12:13:22 +0200 Subject: [PATCH 087/162] suggestions from SonarLint --- .../GenerateOoziePropertiesMojo.java | 12 +- .../WritePredefinedProjectProperties.java | 11 +- .../GenerateOoziePropertiesMojoTest.java | 16 +- .../WritePredefinedProjectPropertiesTest.java | 32 ++-- .../dhp/application/ApplicationUtils.java | 14 -- .../ArgumentApplicationParser.java | 8 +- .../dhp/application/OptionsParameter.java | 3 - .../dnetlib/dhp/collection/ApiDescriptor.java | 2 +- .../java/eu/dnetlib/dhp/common/Constants.java | 3 + .../dnetlib/dhp/common/GraphResultMapper.java | 17 +- .../eu/dnetlib/dhp/common/MakeTarArchive.java | 23 +-- .../eu/dnetlib/dhp/common/MdstoreClient.java | 9 +- .../eu/dnetlib/dhp/common/PacePerson.java | 1 - .../dhp/common/api/ZenodoAPIClient.java | 27 +-- .../dhp/common/api/zenodo/Creator.java | 6 +- .../dnetlib/dhp/common/api/zenodo/File.java | 14 -- .../dhp/common/rest/DNetRestClient.java | 37 ++-- .../dhp/common/vocabulary/Vocabulary.java | 3 +- .../common/vocabulary/VocabularyGroup.java | 13 +- .../java/eu/dnetlib/dhp/message/Message.java | 5 +- .../eu/dnetlib/dhp/oa/merge/AuthorMerger.java | 17 +- .../dhp/parser/utility/VtdUtilityParser.java | 3 + .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 17 +- .../java/eu/dnetlib/dhp/utils/DHPUtils.java | 32 +--- .../dhp/utils/ISLookupClientFactory.java | 15 +- .../saxon/AbstractExtensionFunction.java | 2 +- .../dnetlib/dhp/utils/saxon/ExtractYear.java | 5 +- .../dhp/utils/saxon/NormalizeDate.java | 2 +- .../eu/dnetlib/dhp/utils/saxon/PickFirst.java | 5 +- .../utils/saxon/SaxonTransformerFactory.java | 3 + .../ArgumentApplicationParserTest.java | 4 +- .../dnetlib/dhp/common/HdfsSupportTest.java | 10 +- .../eu/dnetlib/dhp/common/PacePersonTest.java | 6 +- .../dhp/common/SparkSessionSupportTest.java | 6 +- .../dhp/common/api/ZenodoAPIClientTest.java | 10 +- .../dhp/oa/merge/AuthorMergerTest.java | 4 +- .../schema/oaf/utils/OafMapperUtilsTest.java | 36 ++-- .../relation/RelationMapperTest.java | 4 +- .../dnetlib/dhp/actionmanager/ISClient.java | 29 ++- .../actionmanager/promote/MergeAndGet.java | 1 + .../PromoteActionPayloadForGraphTableJob.java | 4 +- ...rtitionActionSetsByPayloadTypeJobTest.java | 2 +- .../promote/MergeAndGetTest.java | 32 ++-- ...moteActionPayloadForGraphTableJobTest.java | 4 +- .../PromoteActionPayloadFunctionsTest.java | 8 +- .../bipfinder/CollectAndSave.java | 12 +- .../bipfinder/SparkAtomicActionScoreJob.java | 6 +- .../project/PrepareProgramme.java | 19 +- .../project/PrepareProjects.java | 2 +- .../project/ReadProjectsFromDB.java | 21 +-- .../project/SparkAtomicActionJob.java | 21 +-- .../project/utils/CSVParser.java | 6 +- .../project/utils/EXCELParser.java | 82 ++++----- .../actionmanager/project/utils/ReadCSV.java | 21 ++- .../project/utils/ReadExcel.java | 30 ++-- .../ror/GenerateRorActionSetJob.java | 7 +- .../dhp/actionmanager/ror/model/Address.java | 4 +- .../dhp/actionmanager/ror/model/Country.java | 4 +- .../ror/model/ExternalIdType.java | 2 +- .../ror/model/ExternalIdTypeDeserializer.java | 3 +- .../ror/model/GeonamesAdmin.java | 2 +- .../actionmanager/ror/model/GeonamesCity.java | 2 +- .../dhp/actionmanager/ror/model/Label.java | 2 +- .../dhp/actionmanager/ror/model/License.java | 2 +- .../actionmanager/ror/model/NameAndCode.java | 4 +- .../actionmanager/ror/model/Relationship.java | 4 +- .../ror/model/RorOrganization.java | 4 +- .../aggregation/common/AggregatorReport.java | 6 +- .../dhp/aggregation/common/ReportingJob.java | 2 +- .../mdstore/MDStoreActionNode.java | 6 +- .../dhp/collection/CollectorWorker.java | 2 +- .../GenerateNativeStoreSparkJob.java | 1 + .../dhp/collection/HttpConnector2.java | 18 +- .../mongodb/MDStoreCollectorPlugin.java | 2 - .../mongodb/MongoDbDumpCollectorPlugin.java | 2 +- .../collection/plugin/oai/OaiIterator.java | 22 +-- .../collection/plugin/rest/RestIterator.java | 26 +-- .../transformation/TransformSparkJobNode.java | 10 +- .../transformation/TransformationFactory.java | 17 +- .../dhp/transformation/xslt/DateCleaner.java | 5 - .../transformation/xslt/PersonCleaner.java | 26 --- .../xslt/XSLTTransformationFunction.java | 10 +- .../transformation/xslt/utils/Capitalize.java | 2 - .../SparkAtomicActionScoreJobTest.java | 30 ++-- .../actionmanager/project/CSVParserTest.java | 4 +- .../project/EXCELParserTest.java | 2 +- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../project/PrepareProjectTest.java | 2 +- .../project/SparkUpdateProjectTest.java | 2 +- .../ror/GenerateRorActionSetJobTest.java | 10 +- .../GenerateNativeStoreSparkJobTest.java | 12 +- .../plugin/rest/RestCollectorPluginTest.java | 4 +- .../CollectorWorkerApplicationTests.java | 6 +- .../transformation/TransformationJobTest.java | 16 +- .../dhp/blacklist/ReadBlacklistFromDB.java | 23 +-- .../SparkRemoveBlacklistedRelationJob.java | 6 +- .../dnetlib/dhp/blacklist/BlackListTest.java | 6 +- .../dhp/broker/model/EventFactory.java | 19 +- .../dhp/broker/oa/CheckDuplictedIdsJob.java | 11 +- .../dhp/broker/oa/IndexEventSubsetJob.java | 6 +- .../dhp/broker/oa/IndexNotificationsJob.java | 29 ++- .../dnetlib/dhp/broker/oa/IndexOnESJob.java | 3 +- .../broker/oa/PartitionEventsByDsIdJob.java | 13 +- .../oa/PrepareRelatedDatasourcesJob.java | 2 +- .../dhp/broker/oa/matchers/UpdateMatcher.java | 2 +- .../AbstractEnrichMissingDataset.java | 4 +- .../relatedProjects/EnrichMissingProject.java | 2 +- .../relatedProjects/EnrichMoreProject.java | 4 +- .../AbstractEnrichMissingPublication.java | 4 +- .../EnrichMissingSoftware.java | 2 +- .../relatedSoftware/EnrichMoreSoftware.java | 2 +- .../simple/EnrichMissingAuthorOrcid.java | 2 +- .../oa/matchers/simple/EnrichMissingPid.java | 2 +- .../matchers/simple/EnrichMissingSubject.java | 4 +- .../matchers/simple/EnrichMoreOpenAccess.java | 2 +- .../oa/matchers/simple/EnrichMorePid.java | 4 +- .../oa/matchers/simple/EnrichMoreSubject.java | 4 +- .../dhp/broker/oa/util/BrokerConstants.java | 3 + .../dhp/broker/oa/util/ClusterUtils.java | 3 + .../dhp/broker/oa/util/ConversionUtils.java | 26 +-- .../util/DatasourceRelationsAccumulator.java | 8 +- .../dhp/broker/oa/util/EventFinder.java | 5 +- .../dhp/broker/oa/util/SubscriptionUtils.java | 3 + .../dhp/broker/oa/util/TrustUtils.java | 2 + .../dhp/broker/oa/util/UpdateInfo.java | 4 +- .../broker/oa/matchers/UpdateMatcherTest.java | 5 +- .../EnrichMissingPublicationDateTest.java | 3 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 26 +-- .../dhp/oa/dedup/AbstractSparkAction.java | 20 ++- .../eu/dnetlib/dhp/oa/dedup/DatePicker.java | 5 +- .../dhp/oa/dedup/DedupRecordFactory.java | 12 +- .../eu/dnetlib/dhp/oa/dedup/DedupUtility.java | 10 +- .../oa/dedup/DispatchEntitiesSparkJob.java | 9 +- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 6 +- .../eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 4 +- .../dnetlib/dhp/oa/dedup/SparkBlockStats.java | 4 +- .../oa/dedup/SparkCopyOpenorgsSimRels.java | 5 +- .../dedup/SparkCopyRelationsNoOpenorgs.java | 21 +-- .../dhp/oa/dedup/SparkCreateDedupRecord.java | 3 +- .../dhp/oa/dedup/SparkCreateMergeRels.java | 14 +- .../oa/dedup/SparkCreateOrgsDedupRecord.java | 3 - .../dhp/oa/dedup/SparkCreateSimRels.java | 9 +- .../dhp/oa/dedup/SparkPrepareNewOrgs.java | 13 +- .../dhp/oa/dedup/SparkPrepareOrgRels.java | 24 +-- .../dhp/oa/dedup/SparkPropagateRelation.java | 6 +- .../dhp/oa/dedup/SparkUpdateEntity.java | 125 +++++++------ .../oa/dedup/graph/ConnectedComponent.java | 14 -- .../dhp/oa/dedup/model/Identifier.java | 8 +- .../dnetlib/dhp/oa/dedup/DatePickerTest.java | 8 +- .../dhp/oa/dedup/EntityMergerTest.java | 14 +- .../dnetlib/dhp/oa/dedup/IdGeneratorTest.java | 6 +- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 15 +- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 20 +-- .../oa/dedup/SparkOpenorgsProvisionTest.java | 16 +- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 2 +- .../dhp/oa/dedup/jpath/JsonPathTest.java | 11 +- .../doiboost/crossref/CrossrefImporter.java | 9 +- .../dnetlib/doiboost/crossref/ESClient.java | 7 +- .../orcid/ActivitiesDecompressor.java | 25 +-- .../orcid/ExtractXMLActivitiesData.java | 7 +- .../orcid/ExtractXMLSummariesData.java | 10 +- .../orcid/OrcidAuthorsDOIsDataGen.java | 3 +- .../doiboost/orcid/OrcidDSManager.java | 3 +- .../orcid/SparkDownloadOrcidAuthors.java | 60 ++++--- .../orcid/SparkDownloadOrcidWorks.java | 46 ++--- .../orcid/SparkGenLastModifiedSeq.java | 24 ++- .../orcid/SparkGenerateDoiAuthorList.java | 20 +-- .../orcid/SparkUpdateOrcidAuthors.java | 35 ++-- .../orcid/SparkUpdateOrcidDatasets.java | 167 ++---------------- .../doiboost/orcid/SparkUpdateOrcidWorks.java | 27 ++- .../doiboost/orcid/SummariesDecompressor.java | 24 +-- .../doiboost/orcid/json/JsonHelper.java | 3 + .../dnetlib/doiboost/orcid/util/HDFSUtil.java | 37 ++-- .../doiboost/orcid/xml/XMLRecordParser.java | 54 +----- .../orcidnodoi/ActivitiesDumpReader.java | 18 +- .../orcidnodoi/GenOrcidAuthorWork.java | 6 +- .../SparkGenEnrichedOrcidWorks.java | 53 +++--- .../doiboost/orcidnodoi/json/JsonWriter.java | 3 + .../orcidnodoi/oaf/PublicationToOaf.java | 47 ++--- .../orcidnodoi/similarity/AuthorMatcher.java | 44 ++--- .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 37 ++-- .../doiboost/orcid/OrcidClientTest.java | 25 +-- .../orcid/xml/XMLRecordParserTest.java | 42 +---- .../orcidnodoi/PublicationToOafTest.java | 23 +-- .../orcidnodoi/xml/OrcidNoDoiTest.java | 141 ++++++--------- .../eu/dnetlib/dhp/PropagationConstant.java | 11 +- .../dhp/bulktag/community/Community.java | 5 - .../community/CommunityConfiguration.java | 14 +- .../CommunityConfigurationFactory.java | 10 +- .../dhp/bulktag/community/Constraint.java | 3 - .../dhp/bulktag/community/Constraints.java | 14 +- .../dhp/bulktag/community/Provider.java | 8 - .../community/QueryInformationSystem.java | 3 +- .../dhp/bulktag/community/ResultTagger.java | 98 +++++----- .../community/SelectionConstraints.java | 3 - .../bulktag/community/TaggingConstants.java | 3 + .../bulktag/community/ZenodoCommunity.java | 1 - .../dhp/bulktag/criteria/Selection.java | 4 +- .../dhp/bulktag/criteria/VerbResolver.java | 8 +- .../bulktag/criteria/VerbResolverFactory.java | 3 + .../PrepareDatasourceCountryAssociation.java | 25 ++- .../PrepareResultCountrySet.java | 9 +- .../SparkCountryPropagationJob.java | 12 +- .../PrepareResultOrcidAssociationStep1.java | 3 - .../PrepareResultOrcidAssociationStep2.java | 2 +- .../SparkOrcidToResultFromSemRelJob.java | 36 ++-- .../PrepareProjectResultsAssociation.java | 7 +- .../SparkResultToProjectThroughSemRelJob.java | 44 ++--- .../PrepareResultCommunitySet.java | 11 +- ...kResultToCommunityFromOrganizationJob.java | 16 +- .../PrepareResultCommunitySetStep2.java | 3 +- ...parkResultToCommunityThroughSemRelJob.java | 10 +- .../PrepareResultInstRepoAssociation.java | 13 +- ...arkResultToOrganizationFromIstRepoJob.java | 58 +++--- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 16 +- .../CommunityConfigurationFactoryTest.java | 9 +- .../CountryPropagationJobTest.java | 27 ++- .../OrcidPropagationJobTest.java | 6 +- .../ProjectPropagationJobTest.java | 6 +- .../ResultToCommunityJobTest.java | 2 +- .../ResultToCommunityJobTest.java | 2 +- .../ResultToOrganizationJobTest.java | 6 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 2 +- .../dhp/oa/graph/clean/OafCleaner.java | 2 +- .../eu/dnetlib/dhp/oa/graph/dump/MakeTar.java | 13 +- .../oa/graph/dump/QueryInformationSystem.java | 9 +- .../dhp/oa/graph/dump/ResultMapper.java | 18 +- .../dhp/oa/graph/dump/SaveCommunityMap.java | 16 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 11 +- .../eu/dnetlib/dhp/oa/graph/dump/Utils.java | 5 +- .../graph/dump/community/CommunitySplit.java | 21 +-- .../community/SparkDumpCommunityProducts.java | 5 +- .../community/SparkPrepareResultProject.java | 27 ++- .../community/SparkUpdateProjectInfo.java | 6 +- .../dhp/oa/graph/dump/complete/Constants.java | 1 - .../dump/complete/CreateContextEntities.java | 21 ++- .../dump/complete/CreateContextRelation.java | 24 ++- .../dhp/oa/graph/dump/complete/Process.java | 4 +- .../dump/complete/QueryInformationSystem.java | 8 +- .../complete/SparkOrganizationRelation.java | 5 +- .../funderresults/SparkDumpFunderResults.java | 9 +- .../SparkResultLinkedToProject.java | 8 +- .../DatasourceCompatibilityComparator.java | 20 --- .../graph/merge/MergeGraphTableSparkJob.java | 14 +- .../raw/AbstractMdRecordToOafMapper.java | 24 +-- .../raw/GenerateEntitiesApplication.java | 10 +- .../oa/graph/raw/MergeClaimsApplication.java | 8 +- .../raw/MigrateDbEntitiesApplication.java | 25 +-- .../raw/MigrateHdfsMdstoresApplication.java | 14 +- .../raw/MigrateMongoMdstoresApplication.java | 4 +- .../dhp/oa/graph/raw/OafToOafMapper.java | 5 +- .../dhp/oa/graph/raw/OdfToOafMapper.java | 9 +- .../graph/raw/PatchRelationsApplication.java | 2 - .../common/AbstractMigrationApplication.java | 16 +- .../oa/graph/GraphHiveImporterJobTest.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 6 +- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- .../dhp/oa/graph/dump/MakeTarTest.java | 2 +- .../dump/PrepareResultProjectJobTest.java | 131 +++++++------- .../dump/QueryInformationSystemTest.java | 9 +- .../oa/graph/dump/SplitForCommunityTest.java | 2 +- .../oa/graph/dump/UpdateProjectInfoTest.java | 2 +- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 6 +- .../graph/dump/complete/CreateEntityTest.java | 4 +- .../dump/complete/CreateRelationTest.java | 4 +- .../ExtractRelationFromEntityTest.java | 2 +- .../dump/complete/FunderParsingTest.java | 7 +- .../complete/QueryInformationSystemTest.java | 10 +- .../RelationFromOrganizationTest.java | 2 +- .../ResultLinkedToProjectTest.java | 4 +- .../dump/funderresult/SplitPerFunderTest.java | 2 +- .../merge/MergeGraphTableSparkJobTest.java | 4 +- .../raw/GenerateEntitiesApplicationTest.java | 4 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 108 ++++++----- .../raw/MigrateDbEntitiesApplicationTest.java | 22 ++- .../MigrateMongoMdstoresApplicationTest.java | 2 - .../raw/PatchRelationApplicationTest.java | 2 +- .../oa/graph/reflections/ReflectionTest.java | 2 +- .../CreateRelatedEntitiesJob_phase1.java | 18 +- .../CreateRelatedEntitiesJob_phase2.java | 8 +- .../dhp/oa/provision/PrepareRelationsJob.java | 12 +- .../dhp/oa/provision/ProvisionConstants.java | 3 + .../oa/provision/SolrAdminApplication.java | 1 - .../dhp/oa/provision/XmlConverterJob.java | 7 +- .../dhp/oa/provision/XmlIndexingJob.java | 15 +- .../dhp/oa/provision/model/JoinedEntity.java | 1 - .../model/ProvisionModelSupport.java | 3 + .../dhp/oa/provision/model/RelatedEntity.java | 1 - .../utils/AuthorPidTypeComparator.java | 1 - .../dhp/oa/provision/utils/ContextMapper.java | 9 +- .../oa/provision/utils/GraphMappingUtils.java | 6 +- .../oa/provision/utils/ISLookupClient.java | 9 +- .../oa/provision/utils/LicenseComparator.java | 69 -------- .../utils/StreamingInputDocumentFactory.java | 79 +++++---- .../oa/provision/utils/TemplateFactory.java | 4 +- .../oa/provision/utils/XmlRecordFactory.java | 73 +++----- .../utils/XmlSerializationUtils.java | 29 ++- .../dhp/oa/provision/utils/ZkServers.java | 2 +- .../sx/provision/DropAndCreateESIndex.java | 1 + .../provision/SparkIndexCollectionOnES.java | 1 + .../provision/IndexRecordTransformerTest.java | 23 ++- .../oa/provision/PrepareRelationsJobTest.java | 19 +- .../provision/SolrAdminApplicationTest.java | 17 +- .../oa/provision/SortableRelationKeyTest.java | 1 - .../dhp/oa/provision/XmlIndexingJobTest.java | 4 +- .../oa/provision/XmlRecordFactoryTest.java | 20 +-- .../graph/usagerawdata/export/ConnectDB.java | 20 +-- .../usagestatsbuild/export/ConnectDB.java | 18 +- 309 files changed, 1874 insertions(+), 2497 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java delete mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java index 10a25fdc30..a642dab707 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -8,8 +8,6 @@ import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; /** * Generates oozie properties which were not provided from commandline. @@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { }; @Override - public void execute() throws MojoExecutionException, MojoFailureException { + public void execute() { if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { String generatedSandboxName = generateSandboxName( @@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { /** * Generates sandbox name from workflow source directory. * - * @param wfSourceDir + * @param wfSourceDir workflow source directory * @return generated sandbox name */ private String generateSandboxName(String wfSourceDir) { // utilize all dir names until finding one of the limiters - List sandboxNameParts = new ArrayList(); + List sandboxNameParts = new ArrayList<>(); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); ArrayUtils.reverse(tokens); if (tokens.length > 0) { for (String token : tokens) { for (String limiter : limiters) { if (limiter.equals(token)) { - return sandboxNameParts.size() > 0 + return !sandboxNameParts.isEmpty() ? StringUtils.join(sandboxNameParts.toArray()) : null; } } - if (sandboxNameParts.size() > 0) { + if (!sandboxNameParts.isEmpty()) { sandboxNameParts.add(0, File.separator); } sandboxNameParts.add(0, token); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java index d195ca86e4..e3cdf5a22f 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -16,6 +16,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected List getEscapeChars(String escapeChars) { List tokens = getListFromCSV(escapeChars); - List realTokens = new ArrayList(); + List realTokens = new ArrayList<>(); for (String token : tokens) { String realToken = getRealToken(token); realTokens.add(realToken); @@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { * @return content */ protected String getContent(String comment, Properties properties, List escapeTokens) { - List names = new ArrayList(properties.stringPropertyNames()); + List names = new ArrayList<>(properties.stringPropertyNames()); Collections.sort(names); StringBuilder sb = new StringBuilder(); if (!StringUtils.isBlank(comment)) { @@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { throws MojoExecutionException { try { String content = getContent(comment, properties, escapeTokens); - FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8); } catch (IOException e) { throw new MojoExecutionException("Error creating properties file", e); } @@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected static final List getListFromCSV(String csv) { if (StringUtils.isBlank(csv)) { - return new ArrayList(); + return new ArrayList<>(); } - List list = new ArrayList(); + List list = new ArrayList<>(); String[] tokens = StringUtils.split(csv, ","); for (String token : tokens) { list.add(token.trim()); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java index 4bfcd3b33e..2ff6bea300 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** @author mhorst, claudio.atzori */ -public class GenerateOoziePropertiesMojoTest { +class GenerateOoziePropertiesMojoTest { private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); @BeforeEach - public void clearSystemProperties() { + void clearSystemProperties() { System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); } @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteSandboxNameAlreadySet() throws Exception { + void testExecuteSandboxNameAlreadySet() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String sandboxName = "originalSandboxName"; @@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteEmptyWorkflowSourceDir() throws Exception { + void testExecuteEmptyWorkflowSourceDir() throws Exception { // given String workflowSourceDir = ""; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteNullSandboxNameGenerated() throws Exception { + void testExecuteNullSandboxNameGenerated() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecute() throws Exception { + void testExecute() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteWithoutRoot() throws Exception { + void testExecuteWithoutRoot() throws Exception { // given String workflowSourceDir = "wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index 0b3ea9653b..84b962b4b8 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension; /** @author mhorst, claudio.atzori */ @ExtendWith(MockitoExtension.class) -public class WritePredefinedProjectPropertiesTest { +class WritePredefinedProjectPropertiesTest { @Mock private MavenProject mavenProject; @@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest { // ----------------------------------- TESTS --------------------------------------------- @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectProperties() throws Exception { + void testExecuteWithProjectProperties() throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test() - public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { + void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { + void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromBlankLocation() { + void testExecuteIncludingPropertyKeysFromBlankLocation() { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { + void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { // given mojo.setQuiet(true); mojo.setIncludePropertyKeysFromFiles(new String[] { @@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidFile() { + void testExecuteIncludingPropertyKeysFromInvalidFile() { // given mojo.setIncludePropertyKeysFromFiles(new String[] { "invalid location" @@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { // given mojo.setIncludeEnvironmentVariables(true); @@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey"; String value = "systemPropertyValue"; @@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) + void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey "; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java deleted file mode 100644 index c53b835611..0000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.application; - -import java.io.*; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; - -import com.google.common.collect.Maps; - -public class ApplicationUtils { - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java index 0429bc25d1..72c1f6a5ec 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java @@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable { final StringWriter stringWriter = new StringWriter(); IOUtils.copy(gis, stringWriter); return stringWriter.toString(); - } catch (Throwable e) { - log.error("Wrong value to decompress:" + abstractCompressed); - throw new RuntimeException(e); + } catch (IOException e) { + log.error("Wrong value to decompress: {}", abstractCompressed); + throw new IllegalArgumentException(e); } } - public static String compressArgument(final String value) throws Exception { + public static String compressArgument(final String value) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(value.getBytes()); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java index 7004112e42..f34326d672 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java @@ -9,9 +9,6 @@ public class OptionsParameter { private boolean paramRequired; private boolean compressed; - public OptionsParameter() { - } - public String getParamName() { return paramName; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java index 12937a1979..fbbbffcbbd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java @@ -34,7 +34,7 @@ public class ApiDescriptor { return params; } - public void setParams(final HashMap params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 108edad477..8fab94e92f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -12,6 +12,9 @@ public class Constants { public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + private Constants() { + } + static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 44599eb83f..8ceee5c8a0 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable { if (oInst.isPresent()) { out .setInstance( - oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); } @@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setMaintitle(iTitle.get(0).getValue()); } @@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setSubtitle(iTitle.get(0).getValue()); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 7dc0e44177..d0909642ce 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); } private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + try (TarArchiveOutputStream ar = new TarArchiveOutputStream( + fileSystem.create(hdfsWritePath).getWrappedStream())) { - TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); - RemoteIterator fileStatusListIterator = fileSystem - .listFiles( - new Path(inputPath), true); + while (iterator.hasNext()) { + writeCurrentFile(fileSystem, dir_name, iterator, ar, 0); + } - while (fileStatusListIterator.hasNext()) { - writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0); } - - ar.close(); } public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java index 0bc782ccb9..d06544ae16 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java @@ -10,8 +10,6 @@ import java.util.Optional; import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.QueryBuilder; +import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; @@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable { final String currentId = Optional .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) - .map(r -> r.first()) + .map(FindIterable::first) .map(d -> d.getString("currentId")) .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); @@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable { if (!Iterables.contains(client.listDatabaseNames(), dbName)) { final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); log.warn(err); - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } return client.getDatabase(dbName); } @@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable { String.format("Missing collection '%s' in database '%s'", collName, db.getName())); log.warn(err); if (abortIfMissing) { - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } else { return null; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e02ca614b..91c6c1825d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -24,7 +24,6 @@ import com.google.common.hash.Hashing; */ public class PacePerson { - private static final String UTF8 = "UTF-8"; private List name = Lists.newArrayList(); private List surname = Lists.newArrayList(); private List fullname = Lists.newArrayList(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 1f267733d1..3f5c6ad4a5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -5,6 +5,9 @@ import java.io.*; import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; @@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable { this.deposition_id = deposition_id; } - public ZenodoAPIClient(String urlString, String access_token) throws IOException { + public ZenodoAPIClient(String urlString, String access_token) { this.urlString = urlString; this.access_token = access_token; @@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(bucket + "/" + file_name) - .addHeader("Content-Type", "application/zip") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .build(); @@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(body) .build(); @@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id + "/actions/newversion") - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); @@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(url) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java index c03762693b..c14af55b6e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java @@ -32,13 +32,13 @@ public class Creator { public static Creator newInstance(String name, String affiliation, String orcid) { Creator c = new Creator(); - if (!(name == null)) { + if (name != null) { c.name = name; } - if (!(affiliation == null)) { + if (affiliation != null) { c.affiliation = affiliation; } - if (!(orcid == null)) { + if (orcid != null) { c.orcid = orcid; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java index c7428de7d4..509f444b9b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java @@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo; import java.io.Serializable; -import net.minidev.json.annotate.JsonIgnore; - public class File implements Serializable { private String checksum; private String filename; private long filesize; private String id; - @JsonIgnore - // private Links links; - public String getChecksum() { return checksum; } @@ -46,13 +41,4 @@ public class File implements Serializable { this.id = id; } -// @JsonIgnore -// public Links getLinks() { -// return links; -// } -// -// @JsonIgnore -// public void setLinks(Links links) { -// this.links = links; -// } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java index 98dabf56a4..af6926cc7d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.common.rest; +import java.io.IOException; import java.util.Arrays; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpUriRequest; @@ -23,17 +23,20 @@ public class DNetRestClient { private static final ObjectMapper mapper = new ObjectMapper(); + private DNetRestClient() { + } + public static T doGET(final String url, Class clazz) throws Exception { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet, clazz); } - public static String doGET(final String url) throws Exception { + public static String doGET(final String url) throws IOException { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet); } - public static String doPOST(final String url, V objParam) throws Exception { + public static String doPOST(final String url, V objParam) throws IOException { final HttpPost httpPost = new HttpPost(url); if (objParam != null) { @@ -45,25 +48,25 @@ public class DNetRestClient { return doHTTPRequest(httpPost); } - public static T doPOST(final String url, V objParam, Class clazz) throws Exception { + public static T doPOST(final String url, V objParam, Class clazz) throws IOException { return mapper.readValue(doPOST(url, objParam), clazz); } - private static String doHTTPRequest(final HttpUriRequest r) throws Exception { - CloseableHttpClient client = HttpClients.createDefault(); + private static String doHTTPRequest(final HttpUriRequest r) throws IOException { + try (CloseableHttpClient client = HttpClients.createDefault()) { - log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); - log - .info( - "request headers: {}", - Arrays - .asList(r.getAllHeaders()) - .stream() - .map(h -> h.getName() + ":" + h.getValue()) - .collect(Collectors.joining(","))); + log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); + log + .info( + "request headers: {}", + Arrays + .asList(r.getAllHeaders()) + .stream() + .map(h -> h.getName() + ":" + h.getValue()) + .collect(Collectors.joining(","))); - CloseableHttpResponse response = client.execute(r); - return IOUtils.toString(response.getEntity().getContent()); + return IOUtils.toString(client.execute(r).getEntity().getContent()); + } } private static T doHTTPRequest(final HttpUriRequest r, Class clazz) throws Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index a9daede8f2..b3eb98d4fa 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -46,7 +46,7 @@ public class Vocabulary implements Serializable { } public VocabularyTerm getTerm(final String id) { - return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); + return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null); } protected void addTerm(final String id, final String name) { @@ -81,7 +81,6 @@ public class Vocabulary implements Serializable { .ofNullable(getTermBySynonym(syn)) .map(term -> getTermAsQualifier(term.getId())) .orElse(null); - // .orElse(OafMapperUtils.unknown(getId(), getName())); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index a89bb486fc..d5f57849c7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable { } vocs.addTerm(vocId, termId, termName); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable { .getTerms() .values() .stream() - .map(t -> t.getId()) + .map(VocabularyTerm::getId) .collect(Collectors.toCollection(HashSet::new)); } @@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable { return Optional .ofNullable(vocId) .map(String::toLowerCase) - .map(id -> vocs.containsKey(id)) + .map(vocs::containsKey) .orElse(false); } private void addSynonyms(final String vocId, final String termId, final String syn) { String id = Optional .ofNullable(vocId) - .map(s -> s.toLowerCase()) + .map(String::toLowerCase) .orElseThrow( - () -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); + () -> new IllegalArgumentException( + String + .format( + "empty vocabulary id for [term:%s, synonym:%s]", termId, syn))); Optional .ofNullable(vocs.get(id)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java index f1107b4b8b..c7a0b5f507 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.message; import java.io.Serializable; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -10,8 +9,8 @@ public class Message implements Serializable { private static final long serialVersionUID = 401753881204524893L; - public static String CURRENT_PARAM = "current"; - public static String TOTAL_PARAM = "total"; + public static final String CURRENT_PARAM = "current"; + public static final String TOTAL_PARAM = "total"; private MessageType messageType; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 7a8e55a6ed..aea046203a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; @@ -19,6 +18,9 @@ public class AuthorMerger { private static final Double THRESHOLD = 0.95; + private AuthorMerger() { + } + public static List merge(List> authors) { authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); @@ -36,7 +38,8 @@ public class AuthorMerger { public static List mergeAuthor(final List a, final List b, Double threshold) { int pa = countAuthorsPids(a); int pb = countAuthorsPids(b); - List base, enrich; + List base; + List enrich; int sa = authorsSize(a); int sb = authorsSize(b); @@ -62,7 +65,7 @@ public class AuthorMerger { // (if an Author has more than 1 pid, it appears 2 times in the list) final Map basePidAuthorMap = base .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -74,7 +77,7 @@ public class AuthorMerger { // (list of pid that are missing in the other list) final List> pidToEnrich = enrich .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -117,9 +120,9 @@ public class AuthorMerger { } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null - ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" - : "") + final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() + : ""; + return (pid.getQualifier() != null ? classid : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java index 9ac0a0bf71..fd4c0191ac 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java @@ -12,6 +12,9 @@ import com.ximpleware.VTDNav; /** Created by sandro on 9/29/16. */ public class VtdUtilityParser { + private VtdUtilityParser() { + } + public static List getTextValuesWithAttributes( final AutoPilot ap, final VTDNav vn, final String xpath, final List attributes) throws VtdException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 1d002ed7ec..d8b1cded8a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { r .getAuthor() .stream() - .filter(a -> Objects.nonNull(a)) + .filter(Objects::nonNull) .filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .collect(Collectors.toList())); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index c6a8fd5a71..720fe47fb2 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*; public class OafMapperUtils { + private OafMapperUtils() { + } + public static Oaf merge(final Oaf left, final Oaf right) { if (ModelSupport.isSubClass(left, OafEntity.class)) { return mergeEntities((OafEntity) left, (OafEntity) right); } else if (ModelSupport.isSubClass(left, Relation.class)) { ((Relation) left).mergeFrom((Relation) right); } else { - throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName()); } return left; } @@ -38,7 +41,7 @@ public class OafMapperUtils { } else if (ModelSupport.isSubClass(left, Project.class)) { left.mergeFrom(right); } else { - throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); } return left; } @@ -62,7 +65,7 @@ public class OafMapperUtils { public static List listKeyValues(final String... s) { if (s.length % 2 > 0) { - throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)"); } final List list = new ArrayList<>(); @@ -88,7 +91,7 @@ public class OafMapperUtils { .stream(values) .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -97,7 +100,7 @@ public class OafMapperUtils { .stream() .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -342,10 +345,10 @@ public class OafMapperUtils { if (instanceList != null) { final Optional min = instanceList .stream() - .map(i -> i.getAccessright()) + .map(Instance::getAccessright) .min(new AccessRightComparator<>()); - final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new); if (StringUtils.isBlank(rights.getClassid())) { rights.setClassid(UNKNOWN); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 8d760a2cdc..6a86f30df7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -34,6 +34,9 @@ public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); + private DHPUtils() { + } + public static Seq toSeq(List list) { return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); } @@ -44,7 +47,7 @@ public class DHPUtils { md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { - System.err.println("Error creating id"); + log.error("Error creating id from {}", s); return null; } } @@ -53,33 +56,6 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } - public static String compressString(final String input) { - try (ByteArrayOutputStream out = new ByteArrayOutputStream(); - Base64OutputStream b64os = new Base64OutputStream(out)) { - GZIPOutputStream gzip = new GZIPOutputStream(b64os); - gzip.write(input.getBytes(StandardCharsets.UTF_8)); - gzip.close(); - return out.toString(); - } catch (Throwable e) { - return null; - } - } - - public static String decompressString(final String input) { - byte[] byteArray = Base64.decodeBase64(input.getBytes()); - int len; - try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); - ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) { - byte[] buffer = new byte[1024]; - while ((len = gis.read(buffer)) != -1) { - bos.write(buffer, 0, len); - } - return bos.toString(); - } catch (Exception e) { - return null; - } - } - public static String getJPathString(final String jsonPath, final String json) { try { Object o = JsonPath.read(json, jsonPath); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java index b326c41597..8ae0bb5c34 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java @@ -18,13 +18,16 @@ public class ISLookupClientFactory { private static final int requestTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10; + private ISLookupClientFactory() { + } + public static ISLookUpService getLookUpService(final String isLookupUrl) { return getServiceStub(ISLookUpService.class, isLookupUrl); } @SuppressWarnings("unchecked") private static T getServiceStub(final Class clazz, final String endpoint) { - log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); + log.info("creating {} stub from {}", clazz.getName(), endpoint); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setAddress(endpoint); @@ -38,12 +41,10 @@ public class ISLookupClientFactory { log .info( - String - .format( - "setting connectTimeout to %s, requestTimeout to %s for service %s", - connectTimeout, - requestTimeout, - clazz.getCanonicalName())); + "setting connectTimeout to {}, requestTimeout to {} for service {}", + connectTimeout, + requestTimeout, + clazz.getCanonicalName()); policy.setConnectionTimeout(connectTimeout); policy.setReceiveTimeout(requestTimeout); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java index 9b00b908c1..81f1b5142d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java @@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException; public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { - public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; + public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public abstract String getName(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java index c7e311b02a..1ea2b9f46a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java @@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(""); } final Item item = arguments[0].head(); @@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction { for (String format : dateFormats) { try { c.setTime(new SimpleDateFormat(format).parse(s)); - String year = String.valueOf(c.get(Calendar.YEAR)); - return year; + return String.valueOf(c.get(Calendar.YEAR)); } catch (ParseException e) { } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java index 1b5f3c40d8..3e5def9b52 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java @@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(BLANK); } String s = arguments[0].head().getStringValue(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java index 46ecafd0aa..b46a415d82 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.utils.saxon; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + import org.apache.commons.lang3.StringUtils; import net.sf.saxon.expr.XPathContext; @@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction { final String s1 = getValue(arguments[0]); final String s2 = getValue(arguments[1]); - return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); + final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : ""; + return new StringValue(value); } private String getValue(final Sequence arg) throws XPathException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java index b85d866f11..61049d2e1f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java @@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl; public class SaxonTransformerFactory { + private SaxonTransformerFactory() { + } + /** * Creates the index record transformer from the given XSLT * diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java index e140208308..1788239f25 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java @@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -public class ArgumentApplicationParserTest { +class ArgumentApplicationParserTest { @Test - public void testParseParameter() throws Exception { + void testParseParameter() throws Exception { final String jsonConfiguration = IOUtils .toString( this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java index 870943816f..fa721d5e56 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java @@ -21,13 +21,13 @@ public class HdfsSupportTest { class Remove { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); } @Test - public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { + void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { // when HdfsSupport.remove(tempDir.toString(), new Configuration()); @@ -36,7 +36,7 @@ public class HdfsSupportTest { } @Test - public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { + void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { // given Path file = Files.createTempFile(tempDir, "p", "s"); @@ -52,13 +52,13 @@ public class HdfsSupportTest { class ListFiles { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); } @Test - public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { + void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java index 5ebd7213e8..cb9ae28869 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; -public class PacePersonTest { +class PacePersonTest { @Test - public void pacePersonTest1() { + void pacePersonTest1() { PacePerson p = new PacePerson("Artini, Michele", false); assertEquals("Artini", p.getSurnameString()); @@ -17,7 +17,7 @@ public class PacePersonTest { } @Test - public void pacePersonTest2() { + void pacePersonTest2() { PacePerson p = new PacePerson("Michele G. Artini", false); assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Michele G", p.getNameString()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java index 2f01c08631..8fa966c2f2 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java @@ -18,7 +18,8 @@ public class SparkSessionSupportTest { class RunWithSparkSession { @Test - public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); @@ -37,7 +38,8 @@ public class SparkSessionSupportTest { } @Test - public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 9ae9c33c2e..2ccaed3e4a 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @Disabled -public class ZenodoAPIClientTest { +class ZenodoAPIClientTest { private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String ACCESS_TOKEN = ""; @@ -22,7 +22,7 @@ public class ZenodoAPIClientTest { private final String depositionId = "674915"; @Test - public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { + void testUploadOldDeposition() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); @@ -44,7 +44,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -67,7 +67,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionNewName() throws IOException, MissingConceptDoiException { + void testNewVersionNewName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -87,7 +87,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionOldName() throws IOException, MissingConceptDoiException { + void testNewVersionOldName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java index 9c4e622148..3a7a41a1b0 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class AuthorMergerTest { +class AuthorMergerTest { private String publicationsBasePath; @@ -43,7 +43,7 @@ public class AuthorMergerTest { } @Test - public void mergeTest() { // used in the dedup: threshold set to 0.95 + void mergeTest() { // used in the dedup: threshold set to 0.95 for (List authors1 : authors) { System.out.println("List " + (authors.indexOf(authors1) + 1)); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8d519a93f6..4068f0abb2 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; import me.xuender.unidecode.Unidecode; -public class OafMapperUtilsTest { +class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -42,7 +42,7 @@ public class OafMapperUtilsTest { } @Test - public void testDateValidation() { + void testDateValidation() { assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); @@ -147,44 +147,46 @@ public class OafMapperUtilsTest { } @Test - public void testDate() { - System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); + void testDate() { + final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998"); + assertNotNull(date); + System.out.println(date); } @Test - public void testMergePubs() throws IOException { + void testMergePubs() throws IOException { Publication p1 = read("publication_1.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class); Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class); - assertEquals(p1.getCollectedfrom().size(), 1); - assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); - assertEquals(d2.getCollectedfrom().size(), 1); + assertEquals(1, p1.getCollectedfrom().size()); + assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey()); + assertEquals(1, d2.getCollectedfrom().size()); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p1, d2) .getResulttype() - .getClassid() - .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + .getClassid()); - assertEquals(p2.getCollectedfrom().size(), 1); + assertEquals(1, p2.getCollectedfrom().size()); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertEquals(d1.getCollectedfrom().size(), 1); + assertEquals(1, d1.getCollectedfrom().size()); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.DATASET_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p2, d1) .getResulttype() - .getClassid() - .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + .getClassid()); } protected HashSet cfId(List collectedfrom) { - return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } protected T read(String filename, Class clazz) throws IOException { diff --git a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java index d1d1ada71a..5743b0831e 100644 --- a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java @@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation; import org.junit.jupiter.api.Test; -public class RelationMapperTest { +class RelationMapperTest { @Test - public void testLoadRels() throws Exception { + void testLoadRels() throws Exception { RelationMapper relationMapper = RelationMapper.load(); relationMapper.keySet().forEach(System.out::println); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 5a80c0b535..088e618c77 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; -import java.util.*; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Triple; import org.dom4j.Document; import org.dom4j.DocumentException; -import org.dom4j.Element; import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.actionmanager.rmi.ActionManagerException; -import eu.dnetlib.actionmanager.set.ActionManagerSet; -import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes; -import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import scala.Tuple2; public class ISClient implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); + private static final Logger log = LoggerFactory.getLogger(ISClient.class); private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; - private final ISLookUpService isLookup; + private final transient ISLookUpService isLookup; public ISClient(String isLookupUrl) { isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -63,7 +60,7 @@ public class ISClient implements Serializable { .map( sets -> sets .stream() - .map(set -> parseSetInfo(set)) + .map(ISClient::parseSetInfo) .filter(t -> ids.contains(t.getLeft())) .map(t -> buildDirectory(basePath, t)) .collect(Collectors.toList())) @@ -73,15 +70,17 @@ public class ISClient implements Serializable { } } - private Triple parseSetInfo(String set) { + private static Triple parseSetInfo(String set) { try { - Document doc = new SAXReader().read(new StringReader(set)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + Document doc = reader.read(new StringReader(set)); return Triple .of( doc.valueOf("//SET/@id"), doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@latest")); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { throw new IllegalStateException(e); } } @@ -99,7 +98,7 @@ public class ISClient implements Serializable { final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" + propertyName + "']/@value/string()"; - log.debug("quering for service property: " + q); + log.debug("quering for service property: {}", q); try { final List value = isLookup.quickSearchProfile(q); return Iterables.getOnlyElement(value); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index fbb0729571..eccfa445cc 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -62,6 +62,7 @@ public class MergeAndGet { x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); } + @SuppressWarnings("unchecked") private static G selectNewerAndGet(G x, A y) { if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7893fcf8bd..c5f252c979 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); + @SuppressWarnings("unchecked") Class actionPayloadClazz = (Class) Class.forName(actionPayloadClassName); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); @@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob { return spark .read() .parquet(path) - .map((MapFunction) value -> extractPayload(value), Encoders.STRING()) + .map((MapFunction) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING()) .map( (MapFunction) value -> decodePayload(actionPayloadClazz, value), Encoders.bean(actionPayloadClazz)); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index f51c697f40..62eec13d51 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest { private ISClient isClient; @Test - public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { + void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { // given Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path outputDir = workingDir.resolve("output"); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index b2248d77aa..4c88e9de32 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -20,7 +20,7 @@ public class MergeAndGetTest { class MergeFromAndGetStrategy { @Test - public void shouldThrowForOafAndOaf() { + void shouldThrowForOafAndOaf() { // given Oaf a = mock(Oaf.class); Oaf b = mock(Oaf.class); @@ -33,7 +33,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndRelation() { + void shouldThrowForOafAndRelation() { // given Oaf a = mock(Oaf.class); Relation b = mock(Relation.class); @@ -46,7 +46,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndOafEntity() { + void shouldThrowForOafAndOafEntity() { // given Oaf a = mock(Oaf.class); OafEntity b = mock(OafEntity.class); @@ -59,7 +59,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOaf() { + void shouldThrowForRelationAndOaf() { // given Relation a = mock(Relation.class); Oaf b = mock(Oaf.class); @@ -72,7 +72,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -85,7 +85,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForRelationAndRelation() { + void shouldBehaveProperlyForRelationAndRelation() { // given Relation a = mock(Relation.class); Relation b = mock(Relation.class); @@ -101,7 +101,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOaf() { + void shouldThrowForOafEntityAndOaf() { // given OafEntity a = mock(OafEntity.class); Oaf b = mock(Oaf.class); @@ -114,7 +114,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -127,7 +127,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { + void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { // given class OafEntitySub1 extends OafEntity { } @@ -145,7 +145,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForOafEntityAndOafEntity() { + void shouldBehaveProperlyForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); OafEntity b = mock(OafEntity.class); @@ -165,7 +165,7 @@ public class MergeAndGetTest { class SelectNewerAndGetStrategy { @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -178,7 +178,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -191,7 +191,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndResult() { + void shouldThrowForOafEntityAndResult() { // given OafEntity a = mock(OafEntity.class); Result b = mock(Result.class); @@ -204,7 +204,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { + void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { // given // real types must be used because subclass-superclass resolution does not work for // mocks @@ -221,7 +221,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnLeftForOafEntityAndOafEntity() { + void shouldShouldReturnLeftForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(1L); @@ -238,7 +238,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnRightForOafEntityAndOafEntity() { + void shouldShouldReturnRightForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(2L); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index 79ab55e072..99ce961aa8 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest { class Main { @Test - public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { + void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { // given Class rowClazz = Relation.class; Class actionPayloadClazz = OafEntity.class; @@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest { @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") - public void shouldPromoteActionPayloadForGraphTable( + void shouldPromoteActionPayloadForGraphTable( MergeAndGet.Strategy strategy, Class rowClazz, Class actionPayloadClazz) diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index 477e4b204a..cbc1bfaba7 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest { class JoinTableWithActionPayloadAndMerge { @Test - public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { + void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { // given class OafImpl extends Oaf { } @@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { + void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { // given String id0 = "id0"; String id1 = "id1"; @@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { + void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { // given String id0 = "id0"; String id1 = "id1"; @@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest { class GroupTableByIdAndMerge { @Test - public void shouldRunProperly() { + void shouldRunProperly() { // given String id1 = "id1"; String id2 = "id2"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java index 4b9fd33f45..a48b84a333 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result; public class CollectAndSave implements Serializable { private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); + Objects + .requireNonNull( + CollectAndSave.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index cea8c28914..f178451c15 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable { private static void prepareResults(SparkSession spark, String inputPath, String outputPath, String bipScorePath, Class inputClazz) { - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD bipDeserializeJavaRDD = sc .textFile(bipScorePath) @@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - System.out.println(bipScores.count()); - Dataset results = readPath(spark, inputPath, inputClazz); results.createOrReplaceTempView("result"); @@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable { ret.setId(value._2().getId()); return ret; }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) value -> value.getId(), Encoders.STRING()) + .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = new Result(); ret.setDataInfo(getDataInfo()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 9e852eb777..40cf5ee532 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -171,26 +171,23 @@ public class PrepareProgramme { } private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { - if (!a.getLanguage().equals("en")) { - if (b.getLanguage().equalsIgnoreCase("en")) { - a.setTitle(b.getTitle()); - a.setLanguage(b.getLanguage()); - } + if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) { + a.setTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); } - if (StringUtils.isEmpty(a.getShortTitle())) { - if (!StringUtils.isEmpty(b.getShortTitle())) { - a.setShortTitle(b.getShortTitle()); - } + if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) { + a.setShortTitle(b.getShortTitle()); } return a; } + @SuppressWarnings("unchecked") private static List prepareClassification(JavaRDD h2020Programmes) { Object[] codedescription = h2020Programmes .map( value -> new Tuple2<>(value.getCode(), - new Tuple2(value.getTitle(), value.getShortTitle()))) + new Tuple2<>(value.getTitle(), value.getShortTitle()))) .collect() .toArray(); @@ -216,7 +213,7 @@ public class PrepareProgramme { String[] tmp = ent.split("\\."); if (tmp.length <= 2) { if (StringUtils.isEmpty(entry._2()._2())) { - map.put(entry._1(), new Tuple2(entry._2()._1(), entry._2()._1())); + map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1())); } else { map.put(entry._1(), entry._2()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index cecd537ba0..b1a381415c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -29,7 +29,7 @@ import scala.Tuple2; */ public class PrepareProjects { - private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java index 2bba9fb60c..2cc20cb156 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient; */ public class ReadProjectsFromDB implements Closeable { - private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + + private static final String query = "SELECT code " + + "from projects where id like 'corda__h2020%' "; + + private final DbClient dbClient; private final Configuration conf; private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT code " + - "from projects where id like 'corda__h2020%' "; - public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { - final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + final Consumer consumer = rs -> producer.apply(rs).forEach(this::writeProject); dbClient.processResults(sql, consumer); } @@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable { public ReadProjectsFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index fdc12c6629..a4a0bf6a4a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Programme; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -47,13 +48,10 @@ import scala.Tuple2; * * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * than one programme. - * - * */ public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { @@ -137,7 +135,6 @@ public class SparkAtomicActionJob { h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setH2020Programme(pm); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); - // setProgramme(h2020classification, ocsvProgramme.get().getClassification()); pp.setH2020classification(Arrays.asList(h2020classification)); return pp; @@ -152,20 +149,16 @@ public class SparkAtomicActionJob { .map((MapFunction, Project>) p -> { Optional op = Optional.ofNullable(p._2()); Project rp = p._1(); - if (op.isPresent()) { - rp.setH2020topicdescription(op.get().getTitle()); - } + op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle())); return rp; }, Encoders.bean(Project.class)) .filter(Objects::nonNull) .groupByKey( - (MapFunction) p -> p.getId(), + (MapFunction) OafEntity::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (s, it) -> { Project first = it.next(); - it.forEachRemaining(p -> { - first.mergeFrom(p); - }); + it.forEachRemaining(first::mergeFrom); return first; }, Encoders.bean(Project.class)) .toJavaRDD() @@ -189,12 +182,6 @@ public class SparkAtomicActionJob { h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); } -// private static void setProgramme(H2020Classification h2020Classification, String classification) { -// String[] tmp = classification.split(" \\| "); -// -// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); -// } - public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { return spark diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 8bdce903b1..e2d8c528e7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -27,12 +27,14 @@ public class CSVParser { final Set headers = parser.getHeaderMap().keySet(); Class clazz = Class.forName(classForName); for (CSVRecord csvRecord : parser.getRecords()) { - final Object cc = clazz.newInstance(); + + @SuppressWarnings("unchecked") + final R cc = (R) clazz.newInstance(); for (String header : headers) { FieldUtils.writeField(cc, header, csvRecord.get(header), true); } - ret.add((R) cc); + ret.add(cc); } return ret; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5f5b61d8b2..5ce730692d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -26,52 +26,52 @@ public class EXCELParser { throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { - OPCPackage pkg = OPCPackage.open(file); - XSSFWorkbook wb = new XSSFWorkbook(pkg); + try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) { - XSSFSheet sheet = wb.getSheet(sheetName); - - if (sheetName == null) { - throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); - } - - List ret = new ArrayList<>(); - - DataFormatter dataFormatter = new DataFormatter(); - Iterator rowIterator = sheet.rowIterator(); - List headers = new ArrayList<>(); - int count = 0; - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - if (count == 0) { - Iterator cellIterator = row.cellIterator(); - - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - headers.add(dataFormatter.formatCellValue(cell)); - } - } else { - Class clazz = Class.forName(classForName); - final Object cc = clazz.newInstance(); - - for (int i = 0; i < headers.size(); i++) { - Cell cell = row.getCell(i); - FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); - - } - - EXCELTopic et = (EXCELTopic) cc; - if (StringUtils.isNotBlank(et.getRcn())) { - ret.add((R) cc); - } + XSSFSheet sheet = wb.getSheet(sheetName); + if (sheetName == null) { + throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } - count += 1; - } + List ret = new ArrayList<>(); - return ret; + DataFormatter dataFormatter = new DataFormatter(); + Iterator rowIterator = sheet.rowIterator(); + List headers = new ArrayList<>(); + int count = 0; + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + if (count == 0) { + Iterator cellIterator = row.cellIterator(); + + while (cellIterator.hasNext()) { + Cell cell = cellIterator.next(); + headers.add(dataFormatter.formatCellValue(cell)); + } + } else { + Class clazz = Class.forName(classForName); + final Object cc = clazz.newInstance(); + + for (int i = 0; i < headers.size(); i++) { + Cell cell = row.getCell(i); + FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); + + } + + EXCELTopic et = (EXCELTopic) cc; + if (StringUtils.isNotBlank(et.getRcn())) { + ret.add((R) cc); + } + + } + + count += 1; + } + + return ret; + } } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d1..b59bd9dbc6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -25,7 +25,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; */ public class ReadCSV implements Closeable { private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; @@ -49,17 +49,16 @@ public class ReadCSV implements Closeable { log.info("Getting CSV file..."); readCSV.execute(classForName); - } } - public void execute(final String classForName) throws Exception { + public void execute(final String classForName) + throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { CSVParser csvParser = new CSVParser(); csvParser .parse(csvFile, classForName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -72,18 +71,18 @@ public class ReadCSV implements Closeable { final String hdfsNameNode, final String fileURL) throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681cf..330779e24a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs */ public class ReadExcel implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private static final Log log = LogFactory.getLog(ReadExcel.class); + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final InputStream excelFile; @@ -51,13 +53,15 @@ public class ReadExcel implements Closeable { } } - public void execute(final String classForName, final String sheetName) throws Exception { + public void execute(final String classForName, final String sheetName) + throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException, + InstantiationException { + EXCELParser excelParser = new EXCELParser(); excelParser .parse(excelFile, classForName, sheetName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -68,20 +72,20 @@ public class ReadExcel implements Closeable { public ReadExcel( final String hdfsPath, final String hdfsNameNode, - final String fileURL) - throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + final String fileURL) throws CollectorException, IOException { + + final Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.excelFile = httpConnector.getInputSourceAsStream(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index d6c17e4151..869e1cb68d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; @@ -74,7 +75,7 @@ public class GenerateRorActionSetJob { final String jsonConfiguration = IOUtils .toString( - SparkAtomicActionJob.class + GenerateRorActionSetJob.class .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -108,7 +109,7 @@ public class GenerateRorActionSetJob { private static void processRorOrganizations(final SparkSession spark, final String inputPath, - final String outputPath) throws Exception { + final String outputPath) throws IOException { readInputPath(spark, inputPath) .map( @@ -203,7 +204,7 @@ public class GenerateRorActionSetJob { private static Dataset readInputPath( final SparkSession spark, - final String path) throws Exception { + final String path) throws IOException { try (final FileSystem fileSystem = FileSystem.get(new Configuration()); final InputStream is = fileSystem.open(new Path(path))) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java index b566a55016..a5acea5ae5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Address implements Serializable { + private static final long serialVersionUID = 2444635485253443195L; + @JsonProperty("lat") private Float lat; @@ -37,8 +39,6 @@ public class Address implements Serializable { @JsonProperty("line") private String line; - private final static long serialVersionUID = 2444635485253443195L; - public Float getLat() { return lat; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java index 3dab60a9f9..1e7621f988 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Country implements Serializable { + private static final long serialVersionUID = 4357848706229493627L; + @JsonProperty("country_code") private String countryCode; @JsonProperty("country_name") private String countryName; - private final static long serialVersionUID = 4357848706229493627L; - public String getCountryCode() { return countryCode; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java index 406bfd82c8..5ea419b4e6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java @@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable { private String preferred; - private final static long serialVersionUID = 2616688352998387611L; + private static final long serialVersionUID = 2616688352998387611L; public ExternalIdType() { } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java index 3fd0c92508..a744a325f3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java @@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode; public class ExternalIdTypeDeserializer extends JsonDeserializer { @Override - public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) - throws IOException, JsonProcessingException { + public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException { final ObjectCodec oc = p.getCodec(); final JsonNode node = oc.readTree(p); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java index 9616db4479..9317a777c9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java @@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable { @JsonProperty("code") private String code; - private final static long serialVersionUID = 7294958526269195673L; + private static final long serialVersionUID = 7294958526269195673L; public String getAsciiName() { return asciiName; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java index 2b0487168b..b13d64b107 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java @@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable { @JsonProperty("license") private License license; - private final static long serialVersionUID = -8389480201526252955L; + private static final long serialVersionUID = -8389480201526252955L; public NameAndCode getNutsLevel2() { return nutsLevel2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java index 61eb0339dc..9a2cb39e37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java @@ -13,7 +13,7 @@ public class Label implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = -6576156103297850809L; + private static final long serialVersionUID = -6576156103297850809L; public String getIso639() { return iso639; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java index bdc8f4c423..a0f6cf7749 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java @@ -13,7 +13,7 @@ public class License implements Serializable { @JsonProperty("license") private String license; - private final static long serialVersionUID = -194308261058176439L; + private static final long serialVersionUID = -194308261058176439L; public String getAttribution() { return attribution; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java index 61d7eb8e60..c0f5d76450 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class NameAndCode implements Serializable { + private static final long serialVersionUID = 5459836979206140843L; + @JsonProperty("name") private String name; @JsonProperty("code") private String code; - private final static long serialVersionUID = 5459836979206140843L; - public String getName() { return name; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java index 8b73db98f5..db9f96445a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Relationship implements Serializable { + private static final long serialVersionUID = 7847399503395576960L; + @JsonProperty("type") private String type; @@ -16,8 +18,6 @@ public class Relationship implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = 7847399503395576960L; - public String getType() { return type; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java index 94de34fee0..b8041cfdfe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java @@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class RorOrganization implements Serializable { + private static final long serialVersionUID = -2658312087616043225L; + @JsonProperty("ip_addresses") private List ipAddresses = new ArrayList<>(); @@ -59,8 +61,6 @@ public class RorOrganization implements Serializable { @JsonProperty("status") private String status; - private final static long serialVersionUID = -2658312087616043225L; - public List getIpAddresses() { return ipAddresses; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java index c822a67236..8e46ab92b8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java @@ -11,8 +11,6 @@ import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.utils.DHPUtils; @@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap implements C private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); - private MessageSender messageSender; + private transient MessageSender messageSender; public AggregatorReport() { } - public AggregatorReport(MessageSender messageSender) throws IOException { + public AggregatorReport(MessageSender messageSender) { this.messageSender = messageSender; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 9926f1688c..5491696736 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -22,7 +22,7 @@ public abstract class ReportingJob { protected final AggregatorReport report; - public ReportingJob(AggregatorReport report) { + protected ReportingJob(AggregatorReport report) { this.report = report; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index 65e7805d89..bab44a3b1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -25,7 +25,7 @@ public class MDStoreActionNode { NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK } - public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; + public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; @@ -70,7 +70,7 @@ public class MDStoreActionNode { if (StringUtils.isBlank(hdfsuri)) { throw new IllegalArgumentException("missing or empty argument namenode"); } - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { @@ -94,7 +94,7 @@ public class MDStoreActionNode { break; } case ROLLBACK: { - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 23ee3e2c6d..d0872da1da 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -116,7 +116,7 @@ public class CollectorWorker extends ReportingJob { final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) - .get(); + .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); switch (plugin) { case mdstore_mongodb_dump: diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index f6fdc266ed..f1f74b09e4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob { totalItems.add(1); try { SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Node node = document.selectSingleNode(xpath); final String originalIdentifier = node.getText(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java index a61e2032c6..8493a3436a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java @@ -32,7 +32,7 @@ public class HttpConnector2 { private String responseType = null; - private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; public HttpConnector2() { this(new HttpClientParams()); @@ -120,7 +120,7 @@ public class HttpConnector2 { if (is3xx(urlConn.getResponseCode())) { // REDIRECTS final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); - log.info(String.format("The requested url has been moved to %s", newUrl)); + log.info("The requested url has been moved to {}", newUrl); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -140,14 +140,14 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - requestUrl + " - waiting and repeating request after suggested retry-after " - + retryAfter + " sec."); + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); backoffAndSleep(retryAfter * 1000); } else { log .warn( - requestUrl + " - waiting and repeating request after default delay of " - + getClientParams().getRetryDelay() + " sec."); + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); @@ -181,12 +181,12 @@ public class HttpConnector2 { } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: " + urlConn.getResponseMessage()); + log.debug("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: " + e.getKey() + " - value: " + v); + log.debug(" key: {} - value: {}", e.getKey(), v); } } } @@ -204,7 +204,7 @@ public class HttpConnector2 { private int obtainRetryAfter(final Map> headerMap) { for (String key : headerMap.keySet()) { - if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index a273149835..549c597204 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -11,8 +11,6 @@ import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientURI; import com.mongodb.client.MongoCollection; import eu.dnetlib.dhp.aggregation.common.AggregatorReport; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index 3199af5b73..ec5bab448e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public static final String PATH_PARAM = "path"; public static final String BODY_JSONPATH = "$.body"; - public FileSystem fileSystem; + private final FileSystem fileSystem; public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { this.fileSystem = fileSystem; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 75dd746ea9..331dee6b47 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.io.IOException; -import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; @@ -16,7 +15,6 @@ import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +28,8 @@ public class OaiIterator implements Iterator { private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); - private final static String REPORT_PREFIX = "oai:"; + private static final String REPORT_PREFIX = "oai:"; + public static final String UTF_8 = "UTF-8"; private final Queue queue = new PriorityBlockingQueue<>(); @@ -68,7 +67,7 @@ public class OaiIterator implements Iterator { try { this.token = firstPage(); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } } @@ -90,7 +89,7 @@ public class OaiIterator implements Iterator { try { token = otherPages(token); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } return res; @@ -99,23 +98,24 @@ public class OaiIterator implements Iterator { @Override public void remove() { + throw new UnsupportedOperationException(); } private String firstPage() throws CollectorException { try { - String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); + String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8); if (set != null && !set.isEmpty()) { - url += "&set=" + URLEncoder.encode(set, "UTF-8"); + url += "&set=" + URLEncoder.encode(set, UTF_8); } if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); + url += "&from=" + URLEncoder.encode(fromDate, UTF_8); } if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); + url += "&until=" + URLEncoder.encode(untilDate, UTF_8); } - log.info("Start harvesting using url: " + url); + log.info("Start harvesting using url: {}", url); return downloadPage(url); } catch (final UnsupportedEncodingException e) { @@ -143,7 +143,7 @@ public class OaiIterator implements Iterator { return downloadPage( baseUrl + "?verb=ListRecords&resumptionToken=" - + URLEncoder.encode(resumptionToken, "UTF-8")); + + URLEncoder.encode(resumptionToken, UTF_8)); } catch (final UnsupportedEncodingException e) { report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 764c21fc21..67ec22b465 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -131,7 +131,9 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { - transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); xpath = XPathFactory.newInstance().newXPath(); @@ -142,7 +144,7 @@ public class RestIterator implements Iterator { private void initQueue() { query = baseUrl + "?" + queryParams + querySize + queryFormat; - log.info("REST calls starting with " + query); + log.info("REST calls starting with {}", query); } private void disconnect() { @@ -174,7 +176,7 @@ public class RestIterator implements Iterator { try { query = downloadPage(query); } catch (CollectorException e) { - log.debug("CollectorPlugin.next()-Exception: " + e); + log.debug("CollectorPlugin.next()-Exception: {}", e); throw new RuntimeException(e); } } @@ -198,7 +200,7 @@ public class RestIterator implements Iterator { // check if cursor=* is initial set otherwise add it to the queryParam URL if (resumptionType.equalsIgnoreCase("deep-cursor")) { - log.debug("check resumptionType deep-cursor and check cursor=*?" + query); + log.debug("check resumptionType deep-cursor and check cursor=*?{}", query); if (!query.contains("&cursor=")) { query += "&cursor=*"; } @@ -208,16 +210,16 @@ public class RestIterator implements Iterator { log.info("requestig URL [{}]", query); URL qUrl = new URL(query); - log.debug("authMethod :" + authMethod); + log.debug("authMethod: {}", authMethod); if ("bearer".equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestMethod("GET"); theHttpInputStream = conn.getInputStream(); } else if (BASIC.equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); @@ -237,13 +239,13 @@ public class RestIterator implements Iterator { if (!(emptyXml).equalsIgnoreCase(resultXml)) { resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); - log.debug("nodeList.length: " + nodeList.getLength()); + log.debug("nodeList.length: {}", nodeList.getLength()); for (int i = 0; i < nodeList.getLength(); i++) { StringWriter sw = new StringWriter(); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); String toEnqueue = sw.toString(); if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { - log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); + log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml); } else { recordQueue.add(sw.toString()); } @@ -274,9 +276,9 @@ public class RestIterator implements Iterator { String[] resumptionKeyValue = arrayUrlArgStr.split("="); if (isInteger(resumptionKeyValue[1])) { urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); - log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); + log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize); } else { - log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); + log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]); } } } @@ -295,7 +297,7 @@ public class RestIterator implements Iterator { discoverResultSize += nodeList.getLength(); } } - log.info("discoverResultSize: {}", discoverResultSize); + log.info("discoverResultSize: {}", discoverResultSize); break; case "pagination": diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c7201a2674..a01703675d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -67,10 +67,10 @@ public class TransformSparkJobNode { log.info("outputBasePath: {}", outputBasePath); final String isLookupUrl = parser.get("isLookupUrl"); - log.info(String.format("isLookupUrl: %s", isLookupUrl)); + log.info("isLookupUrl: {}", isLookupUrl); final String dateOfTransformation = parser.get("dateOfTransformation"); - log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); + log.info("dateOfTransformation: {}", dateOfTransformation); final Integer rpt = Optional .ofNullable(parser.get("recordsPerTask")) @@ -129,9 +129,9 @@ public class TransformSparkJobNode { .map((Function) x::call); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); - log.info("Transformed item " + ct.getProcessedItems().count()); - log.info("Total item " + ct.getTotalItems().count()); - log.info("Transformation Error item " + ct.getErrorItems().count()); + log.info("Transformed item {}", ct.getProcessedItems().count()); + log.info("Total item {}", ct.getTotalItems().count()); + log.info("Transformation Error item {}", ct.getErrorItems().count()); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); writeHdfsFile( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 096d0e2896..e93f3b5184 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') " + + + "where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + + private TransformationFactory() { + } public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -27,7 +33,7 @@ public class TransformationFactory { try { final String transformationPlugin = jobArgument.get("transformationPlugin"); - log.info("Transformation plugin required " + transformationPlugin); + log.info("Transformation plugin required {}", transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { final String transformationRuleId = jobArgument.get("transformationRuleId"); @@ -38,7 +44,7 @@ public class TransformationFactory { final String transformationRule = queryTransformationRuleFromIS( transformationRuleId, isLookupService); - final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); + final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, vocabularies); @@ -46,7 +52,6 @@ public class TransformationFactory { default: throw new DnetTransformationException( "transformation plugin does not exists for " + transformationPlugin); - } } catch (Throwable e) { @@ -55,9 +60,9 @@ public class TransformationFactory { } private static String queryTransformationRuleFromIS(final String transformationRuleId, - final ISLookUpService isLookUpService) throws Exception { + final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException { final String query = String.format(TRULE_XQUERY, transformationRuleId); - System.out.println("asking query to IS: " + query); + log.info("asking query to IS: {}", query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 9da0747e65..3d57b966f1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import java.io.Serializable; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import net.sf.saxon.s9api.*; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java index e3d5888586..1aa549c099 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java @@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable { private static final Set particles = null; - public PersonCleaner() { - - } - private String normalize(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\s+-\\s+", "-"); - -// s = s.replaceAll("[\\W&&[^,-]]", " "); - -// System.out.println("class Person: s: " + s); - -// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replace("\\d", " "); s = s.replace("\\n", " "); @@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { s = s.replaceAll("\\s+", " "); if (s.contains(",")) { - // System.out.println("class Person: s: " + s); - String[] arr = s.split(","); if (arr.length == 1) { @@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } else if (arr.length > 1) { surname = splitTerms(arr[0]); firstname = splitTermsFirstName(arr[1]); -// System.out.println("class Person: surname: " + surname); -// System.out.println("class Person: firstname: " + firstname); - fullname.addAll(surname); fullname.addAll(firstname); } @@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini firstname = fullname.subList(0, lastInitialPosition + 1); - System.out.println("name: " + firstname); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI for (String term : fullname) { @@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } private List splitTerms(String s) { - if (particles == null) { - // particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); - } - List list = Lists.newArrayList(); for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { - // if (!particles.contains(part.toLowerCase())) { list.add(part); - - // } } return list; } @@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { public String getNormalisedFullname() { return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : Joiner.on(" ").join(fullname); - // return isAccurate() ? - // Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : - // Joiner.on(" ").join(fullname); } public List getCapitalSurname() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index 43291e5de1..acf48ccc57 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.transformation.xslt; -import java.io.ByteArrayInputStream; import java.io.Serializable; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction, Serializable { - public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; + public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform"; - private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; + private static final String DATASOURCE_ID_PARAM = "varDataSourceId"; - private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; + private static final String DATASOURCE_NAME_PARAM = "varOfficialName"; private final AggregationCounter aggregationCounter; @@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index 7200d2896d..f2158748bc 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -67,7 +69,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOne() throws Exception { + void matchOne() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -98,7 +100,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -129,7 +131,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOneWithTwo() throws Exception { + void matchOneWithTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -160,7 +162,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -190,23 +192,21 @@ public class SparkAtomicActionScoreJobTest { List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); String tmp_influence = tmp_ds.get(0).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); tmp_influence = tmp_ds.get(1).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); - Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0))); + assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); } @Test - public void matchTwo() throws Exception { + void matchTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -237,7 +237,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 2); + assertEquals(2, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java index da5beecf9a..dd7e1910f6 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -9,10 +9,10 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; -public class CSVParserTest { +class CSVParserTest { @Test - public void readProgrammeTest() throws Exception { + void readProgrammeTest() throws Exception { String programmecsv = IOUtils .toString( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index b7155bc3a4..6c309acb3b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -30,7 +30,7 @@ public class EXCELParserTest { } @Test - public void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, + void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, IllegalAccessException, InstantiationException { EXCELParser excelParser = new EXCELParser(); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 256dc05213..f128b56105 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -66,7 +66,7 @@ public class PrepareH2020ProgrammeTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { PrepareProgramme .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index 0db3485f5b..f0f3532aad 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -66,7 +66,7 @@ public class PrepareProjectTest { } @Test - public void numberDistinctProjectTest() throws Exception { + void numberDistinctProjectTest() throws Exception { PrepareProjects .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 42e4946816..a77dbace48 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -69,7 +69,7 @@ public class SparkUpdateProjectTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { SparkAtomicActionJob .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java index f16901cb49..aa11f4ab5b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.actionmanager.ror; import java.io.FileInputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -30,7 +32,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); final Organization org = GenerateRorActionSetJob.convertRorOrg(r); - System.out.println(mapper.writeValueAsString(org)); + final String s = mapper.writeValueAsString(org); + Assertions.assertTrue(StringUtils.isNotBlank(s)); + System.out.println(s); } @Test @@ -39,7 +43,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); for (final RorOrganization r : arr) { - GenerateRorActionSetJob.convertRorOrg(r); + Organization o = GenerateRorActionSetJob.convertRorOrg(r); + Assertions.assertNotNull(o); + Assertions.assertTrue(StringUtils.isNotBlank(o.getId())); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index afb6ae6a12..633a473790 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -97,7 +97,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + void testGenerateNativeStoreSparkJobRefresh() throws Exception { MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); @@ -125,7 +125,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + void testGenerateNativeStoreSparkJobIncremental() throws Exception { MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); @@ -155,7 +155,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(3) - public void testTransformSparkJob() throws Exception { + void testTransformSparkJob() throws Exception { setUpVocabulary(); @@ -206,7 +206,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testJSONSerialization() throws Exception { + void testJSONSerialization() throws Exception { final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json")); System.out.println("s = " + s); final ObjectMapper mapper = new ObjectMapper(); @@ -217,7 +217,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testGenerationMetadataRecord() throws Exception { + void testGenerationMetadataRecord() throws Exception { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); @@ -236,7 +236,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testEquals() throws IOException { + void testEquals() throws IOException { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final MetadataRecord record = GenerateNativeStoreSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index efe925175c..f2b873e109 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.collection.HttpClientParams; * @author js, Andreas Czerniak * */ -public class RestCollectorPluginTest { +class RestCollectorPluginTest { private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); @@ -65,7 +65,7 @@ public class RestCollectorPluginTest { @Disabled @Test - public void test() throws CollectorException { + void test() throws CollectorException { AtomicInteger i = new AtomicInteger(0); final Stream stream = rcp.collect(api, new AggregatorReport()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java index b5ea5f0696..f52f4632ad 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.collector.worker; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -11,10 +11,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.collection.ApiDescriptor; @Disabled -public class CollectorWorkerApplicationTests { +class CollectorWorkerApplicationTests { @Test - public void testCollectionOAI() throws Exception { + void testCollectionOAI() throws Exception { final ApiDescriptor api = new ApiDescriptor(); api.setId("oai"); api.setProtocol("oai"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 948a8f93be..7bd7baaea9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -33,7 +33,7 @@ import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @ExtendWith(MockitoExtension.class) -public class TransformationJobTest extends AbstractVocabularyTest { +class TransformationJobTest extends AbstractVocabularyTest { private SparkConf sparkConf; @@ -49,7 +49,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Date cleaner") - public void testDateCleaner() throws Exception { + void testDateCleaner() throws Exception { DateCleaner dc = new DateCleaner(); assertEquals("1982-09-20", dc.clean("20/09/1982")); assertEquals("2002-09-20", dc.clean("20-09-2002")); @@ -60,7 +60,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator") - public void testTransformSaxonHE() throws Exception { + void testTransformSaxonHE() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -79,7 +79,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr") - public void testTransformITGv4Zenodo() throws Exception { + void testTransformITGv4Zenodo() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -97,7 +97,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE") - public void testTransformMostlyUsedScript() throws Exception { + void testTransformMostlyUsedScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl"; @@ -119,7 +119,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI") - public void testTransformRestScript() throws Exception { + void testTransformRestScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl"; @@ -140,7 +140,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)") - public void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { + void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { @@ -203,7 +203,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main") - public void transformTest(@TempDir Path testDir) throws Exception { + void transformTest(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 2caa66db42..38ffd28fef 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; +import java.sql.SQLException; import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -32,11 +33,11 @@ public class ReadBlacklistFromDB implements Closeable { private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " + + private static final String QUERY = "SELECT source_type, unnest(original_source_objects) as source, " + "target_type, unnest(original_target_objects) as target, " + "relationship FROM blacklist WHERE status = 'ACCEPTED'"; @@ -60,12 +61,12 @@ public class ReadBlacklistFromDB implements Closeable { dbPassword)) { log.info("Processing blacklist..."); - rbl.execute(query, rbl::processBlacklistEntry); + rbl.execute(QUERY, rbl::processBlacklistEntry); } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); @@ -99,7 +100,7 @@ public class ReadBlacklistFromDB implements Closeable { return Arrays.asList(direct, inverse); - } catch (final Exception e) { + } catch (final SQLException e) { throw new RuntimeException(e); } } @@ -112,12 +113,14 @@ public class ReadBlacklistFromDB implements Closeable { public ReadBlacklistFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { @@ -133,7 +136,7 @@ public class ReadBlacklistFromDB implements Closeable { try { writer.write(OBJECT_MAPPER.writeValueAsString(r)); writer.newLine(); - } catch (final Exception e) { + } catch (final IOException e) { throw new RuntimeException(e); } } diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 91bcb9d1c7..38ef63d270 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -114,10 +114,8 @@ public class SparkRemoveBlacklistedRelationJob { .map((MapFunction, Relation>) c -> { Relation ir = c._1(); Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; - } + if (obl.isPresent() && ir.equals(obl.get())) { + return null; } return ir; }, Encoders.bean(Relation.class)) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 585848589c..058ea271c1 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -62,7 +62,7 @@ public class BlackListTest { } @Test - public void noRemoveTest() throws Exception { + void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -89,7 +89,7 @@ public class BlackListTest { } @Test - public void removeNoMergeMatchTest() throws Exception { + void removeNoMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -128,7 +128,7 @@ public class BlackListTest { } @Test - public void removeMergeMatchTest() throws Exception { + void removeMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 429eb7d11b..584438d44f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -9,19 +9,24 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; +import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; +import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; public class EventFactory { - private final static String PRODUCER_ID = "OpenAIRE"; + private static final String PRODUCER_ID = "OpenAIRE"; - private final static String[] DATE_PATTERNS = { + private static final String[] DATE_PATTERNS = { "yyyy-MM-dd" }; + private EventFactory() { + } + public static Event newBrokerEvent(final UpdateInfo updateInfo) { final Event res = new Event(); @@ -61,7 +66,7 @@ public class EventFactory { map.setTargetResultId(target.getOpenaireId()); final List titles = target.getTitles(); - if (titles.size() > 0) { + if (!titles.isEmpty()) { map.setTargetResultTitle(titles.get(0)); } @@ -70,8 +75,12 @@ public class EventFactory { map.setTargetDateofacceptance(date); } - map.setTargetSubjects(target.getSubjects().stream().map(s -> s.getValue()).collect(Collectors.toList())); - map.setTargetAuthors(target.getCreators().stream().map(a -> a.getFullname()).collect(Collectors.toList())); + map + .setTargetSubjects( + target.getSubjects().stream().map(OaBrokerTypedValue::getValue).collect(Collectors.toList())); + map + .setTargetAuthors( + target.getCreators().stream().map(OaBrokerAuthor::getFullname).collect(Collectors.toList())); // PROVENANCE INFO map.setTrust(updateInfo.getTrust()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java index 89fc2e703e..f0aa6491ff 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java @@ -10,15 +10,11 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; @@ -88,8 +84,7 @@ class CountAggregator extends Aggregator, Tuple2 merge(final Tuple2 arg0, final Tuple2 arg1) { - final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); - return new Tuple2<>(s, arg0._2 + arg1._2); + return doMerge(arg0, arg1); } @Override @@ -99,6 +94,10 @@ class CountAggregator extends Aggregator, Tuple2 reduce(final Tuple2 arg0, final Tuple2 arg1) { + return doMerge(arg0, arg1); + } + + private Tuple2 doMerge(final Tuple2 arg0, final Tuple2 arg1) { final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); return new Tuple2<>(s, arg0._2 + arg1._2); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java index 05ff2aa38d..0fbc763e0f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa; +import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -98,7 +99,6 @@ public class IndexEventSubsetJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -114,11 +114,11 @@ public class IndexEventSubsetJob { log.info("*** Deleting old events"); final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000); - log.info("*** Deleted events: " + message); + log.info("*** Deleted events: {}", message); } - private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 80549e1ce7..e8ef5dd3e9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -33,11 +33,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.ConditionParams; -import eu.dnetlib.dhp.broker.model.Event; -import eu.dnetlib.dhp.broker.model.MappedFields; -import eu.dnetlib.dhp.broker.model.Notification; -import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.model.*; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; @@ -89,9 +85,9 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - log.info("Number of subscriptions: " + subscriptions.size()); + log.info("Number of subscriptions: {}", subscriptions.size()); - if (subscriptions.size() > 0) { + if (!subscriptions.isEmpty()) { final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils @@ -106,7 +102,6 @@ public class IndexNotificationsJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -122,7 +117,7 @@ public class IndexNotificationsJob { log.info("*** Deleting old notifications"); final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: " + message); + log.info("*** Deleted notifications: {}", message); log.info("*** sendNotifications (emails, ...)"); sendNotifications(brokerApiBaseUrl, startTime - 1000); @@ -174,28 +169,28 @@ public class IndexNotificationsJob { return false; } - if (conditions.containsKey("targetDateofacceptance") && !conditions + if (conditions.containsKey("targetDateofacceptance") && conditions .get("targetDateofacceptance") .stream() - .anyMatch( + .noneMatch( c -> SubscriptionUtils .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } if (conditions.containsKey("targetResultTitle") - && !conditions + && conditions .get("targetResultTitle") .stream() - .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { return false; } if (conditions.containsKey("targetAuthors") - && !conditions + && conditions .get("targetAuthors") .stream() - .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { return false; } @@ -207,7 +202,7 @@ public class IndexNotificationsJob { } - private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { + private static List listSubscriptions(final String brokerApiBaseUrl) throws IOException { final String url = brokerApiBaseUrl + "/api/subscriptions"; final HttpGet req = new HttpGet(url); @@ -222,7 +217,7 @@ public class IndexNotificationsJob { } } - private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java index 380a689e4d..0c74d8a6d7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java @@ -7,6 +7,7 @@ import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; @@ -61,7 +62,7 @@ public class IndexOnESJob { final JavaRDD inputRdd = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map(IndexOnESJob::eventAsJsonString, Encoders.STRING()) + .map((MapFunction) IndexOnESJob::eventAsJsonString, Encoders.STRING()) .javaRDD(); final Map esCfg = new HashMap<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index e061c0d3b7..b5c891bb88 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -77,9 +77,8 @@ public class PartitionEventsByDsIdJob { } log.info("validOpendoarIds: {}", validOpendoarIds); - runWithSparkSession(conf, isSparkSessionManaged, spark -> { - - ClusterUtils + runWithSparkSession( + conf, isSparkSessionManaged, spark -> ClusterUtils .readPath(spark, eventsPath, Event.class) .filter((FilterFunction) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) .filter((FilterFunction) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) @@ -92,9 +91,7 @@ public class PartitionEventsByDsIdJob { .partitionBy("group") .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(partitionPath); - - }); + .json(partitionPath)); renameSubDirs(partitionPath); } @@ -102,14 +99,14 @@ public class PartitionEventsByDsIdJob { private static void renameSubDirs(final String path) throws IOException { final FileSystem fs = FileSystem.get(new Configuration()); - log.info("** Renaming subdirs of " + path); + log.info("** Renaming subdirs of {}", path); for (final FileStatus fileStatus : fs.listStatus(new Path(path))) { if (fileStatus.isDirectory()) { final Path oldPath = fileStatus.getPath(); final String oldName = oldPath.getName(); if (oldName.contains("=")) { final Path newPath = new Path(path + "/" + StringUtils.substringAfter(oldName, "=")); - log.info(" * " + oldPath.getName() + " -> " + newPath.getName()); + log.info(" * {} -> {}", oldPath.getName(), newPath.getName()); fs.rename(oldPath, newPath); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java index 61ab5e250e..2a247b7aa8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java @@ -105,7 +105,7 @@ public class PrepareRelatedDatasourcesJob { .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .map( - (MapFunction) r -> DatasourceRelationsAccumulator.calculateTuples(r), + (MapFunction) DatasourceRelationsAccumulator::calculateTuples, Encoders.bean(DatasourceRelationsAccumulator.class)) .flatMap( (FlatMapFunction>) acc -> acc diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index fba82aa8cc..87fed7db73 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -26,7 +26,7 @@ public abstract class UpdateMatcher { private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; - public UpdateMatcher(final int maxNumber, final Function topicFunction, + protected UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { this.maxNumber = maxNumber; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java index 2f73a24486..88ad481781 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingDataset extends UpdateMatcher { - public AbstractEnrichMissingDataset(final Topic topic) { + protected AbstractEnrichMissingDataset(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedDataset::getOpenaireId); } protected abstract boolean filterByType(String relType); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index ab2735f2a8..440602772f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -15,7 +15,7 @@ public class EnrichMissingProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MISSING_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 85086a6df8..2e523da2fc 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -18,7 +18,7 @@ public class EnrichMoreProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MORE_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override @@ -32,7 +32,7 @@ public class EnrichMoreProject extends UpdateMatcher { final Set existingProjects = target .getProjects() .stream() - .map(p -> p.getOpenaireId()) + .map(OaBrokerProject::getOpenaireId) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index 7ba3e5e029..a709eea300 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingPublication extends UpdateMatcher { - public AbstractEnrichMissingPublication(final Topic topic) { + protected AbstractEnrichMissingPublication(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getPublications().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedPublication::getOpenaireId); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index a638024bc0..a756660273 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -16,7 +16,7 @@ public class EnrichMissingSoftware super(10, s -> Topic.ENRICH_MISSING_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index a6cd343590..ec340b42fc 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -18,7 +18,7 @@ public class EnrichMoreSoftware extends UpdateMatcher { super(10, s -> Topic.ENRICH_MORE_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index e834d1dde6..125eac862f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -20,7 +20,7 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher { super(40, aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID, (p, aut) -> p.getCreators().add(aut), - aut -> aut.getOrcid()); + OaBrokerAuthor::getOrcid); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 4e40038908..f32cec90db 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -23,7 +23,7 @@ public class EnrichMissingPid extends UpdateMatcher { protected List findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) { - if (target.getPids().size() > 0) { + if (!target.getPids().isEmpty()) { return Arrays.asList(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index cb3ea54645..f07bbd52f0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -35,7 +35,7 @@ public class EnrichMissingSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMissingSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMissingSubject extends UpdateMatcher { final Set existingSubject = target .getSubjects() .stream() - .map(s -> subjectAsString(s)) + .map(EnrichMissingSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 46f6fa80ca..585531095d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -33,7 +33,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { .getInstances() .stream() .filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS)) - .map(i -> i.getUrl()) + .map(OaBrokerInstance::getUrl) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 609437b9d0..a98b96b99b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -18,7 +18,7 @@ public class EnrichMorePid extends UpdateMatcher { super(20, pid -> Topic.ENRICH_MORE_PID, (p, pid) -> p.getPids().add(pid), - pid -> pidAsString(pid)); + EnrichMorePid::pidAsString); } @Override @@ -32,7 +32,7 @@ public class EnrichMorePid extends UpdateMatcher { final Set existingPids = target .getPids() .stream() - .map(pid -> pidAsString(pid)) + .map(EnrichMorePid::pidAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 1f6edf96e7..b62b509c74 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -35,7 +35,7 @@ public class EnrichMoreSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMoreSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMoreSubject extends UpdateMatcher { final Set existingSubjects = target .getSubjects() .stream() - .map(pid -> subjectAsString(pid)) + .map(EnrichMoreSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 2055a014e0..790ca4e617 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -12,6 +12,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class BrokerConstants { + private BrokerConstants() { + } + public static final String OPEN_ACCESS = "OPEN"; public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index 7c4ca1d22f..2e9c039904 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -24,6 +24,9 @@ public class ClusterUtils { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private ClusterUtils() { + } + public static void createDirIfMissing(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 6f0a522446..bc37203d3c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -13,8 +14,6 @@ import org.dom4j.DocumentHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Function; - import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerExternalReference; import eu.dnetlib.broker.objects.OaBrokerInstance; @@ -46,6 +45,9 @@ public class ConversionUtils { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); + private ConversionUtils() { + } + public static List oafInstanceToBrokerInstances(final Instance i) { if (i == null) { return new ArrayList<>(); @@ -69,7 +71,7 @@ public class ConversionUtils { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } - public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { + public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; } @@ -100,7 +102,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { + public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) { if (result == null) { return null; } @@ -142,12 +144,12 @@ public class ConversionUtils { final String pids = author.getPid() != null ? author .getPid() .stream() - .filter(pid -> pid != null) + .filter(Objects::nonNull) .filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) - .map(pid -> pid.getValue()) - .map(pid -> cleanOrcid(pid)) + .map(StructuredProperty::getValue) + .map(ConversionUtils::cleanOrcid) .filter(StringUtils::isNotBlank) .findFirst() .orElse(null) : null; @@ -187,7 +189,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { + public static OaBrokerProject oafProjectToBrokerProject(final Project p) { if (p == null) { return null; } @@ -206,14 +208,14 @@ public class ConversionUtils { res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction")); res.setFundingProgram(fdoc.valueOf("//funding_level_0/name")); } catch (final DocumentException e) { - log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree); + log.error("Error in record {}: invalid fundingtree: {}", p.getId(), ftree); } } return res; } - public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { + public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { if (sw == null) { return null; } @@ -228,7 +230,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { + public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { if (ds == null) { return null; } @@ -241,7 +243,7 @@ public class ConversionUtils { } private static String first(final List list) { - return list != null && list.size() > 0 ? list.get(0) : null; + return list != null && !list.isEmpty() ? list.get(0) : null; } private static String kvValue(final KeyValue kv) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java index c693be93c5..658a42ac18 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java @@ -10,6 +10,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple3; @@ -39,7 +41,7 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set collectedFromSet = r .getCollectedfrom() .stream() - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .collect(Collectors.toSet()); @@ -47,10 +49,10 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set hostedBySet = r .getInstance() .stream() - .map(i -> i.getHostedby()) + .map(Instance::getHostedby) .filter(Objects::nonNull) .filter(kv -> !StringUtils.equalsIgnoreCase(kv.getValue(), "Unknown Repository")) - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .filter(id -> !collectedFromSet.contains(id)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java index 103751f954..b2214e07e0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java @@ -41,8 +41,6 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; public class EventFinder { - private static final Logger log = LoggerFactory.getLogger(EventFinder.class); - private static final List> matchers = new ArrayList<>(); static { matchers.add(new EnrichMissingAbstract()); @@ -72,6 +70,9 @@ public class EventFinder { matchers.add(new EnrichMissingDatasetIsSupplementedBy()); } + private EventFinder() { + } + public static EventGroup generateEvents(final ResultGroup results, final Set dsIdWhitelist, final Set dsIdBlacklist, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java index adb1c753b1..cf35621934 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java @@ -12,6 +12,9 @@ public class SubscriptionUtils { private static final long ONE_DAY = 86_400_000; + private SubscriptionUtils() { + } + public static boolean verifyListSimilar(final List list, final String value) { return list.stream().anyMatch(s -> verifySimilar(s, value)); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 72fe1b204e..a49801f32c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -30,7 +30,9 @@ public class TrustUtils { } catch (final IOException e) { log.error("Error loading dedupConfig, e"); } + } + private TrustUtils() { } protected static float calculateTrust(final OaBrokerMainEntity r1, final OaBrokerMainEntity r2) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 5a9cb5e093..d29414e527 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -88,14 +88,14 @@ public final class UpdateInfo { .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getName()) + .map(OaBrokerRelatedDatasource::getName) .findFirst() .orElse(""); final String provType = getSource() .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getType()) + .map(OaBrokerRelatedDatasource::getType) .findFirst() .orElse(""); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 8fa95abe5f..45bfc785f1 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; @@ -72,7 +73,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test @@ -127,7 +128,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 77a19af4c1..550ded9f48 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; @@ -32,7 +33,7 @@ class EnrichMissingPublicationDateTest { final OaBrokerMainEntity target = new OaBrokerMainEntity(); source.setPublicationdate("2018"); final List list = matcher.findDifferences(source, target); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 974baa28b4..a8bc03e317 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,67 +9,67 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; @Test - public void rescaleTest_1() { + void rescaleTest_1() { verifyValue(-0.3, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_2() { + void rescaleTest_2() { verifyValue(0.0, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_3() { + void rescaleTest_3() { verifyValue(0.5, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_4() { + void rescaleTest_4() { verifyValue(0.95, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_5() { + void rescaleTest_5() { verifyValue(0.96, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_6() { + void rescaleTest_6() { verifyValue(0.97, 0.3f); } @Test - public void rescaleTest_7() { + void rescaleTest_7() { verifyValue(0.98, 0.45f); } @Test - public void rescaleTest_8() { + void rescaleTest_8() { verifyValue(0.99, 0.6f); } @Test - public void rescaleTest_9() { + void rescaleTest_9() { verifyValue(1.00, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_10() { + void rescaleTest_10() { verifyValue(1.01, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_11() { + void rescaleTest_11() { verifyValue(2.00, BrokerConstants.MAX_TRUST); } @Test - public void test() throws Exception { + void test() { final OaBrokerMainEntity r1 = new OaBrokerMainEntity(); r1.getTitles().add("D-NET Service Package: Data Import"); r1.getPids().add(new OaBrokerTypedValue("doi", "123")); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 647a1b9c89..6a9b21b003 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -20,6 +20,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -43,23 +44,26 @@ abstract class AbstractSparkAction implements Serializable { protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - public ArgumentApplicationParser parser; // parameters for the spark action - public SparkSession spark; // the spark session + public final ArgumentApplicationParser parser; // parameters for the spark action + public final SparkSession spark; // the spark session - public AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { + protected AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { this.parser = parser; this.spark = spark; } public List getConfigurations(ISLookUpService isLookUpService, String orchestrator) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); @@ -93,7 +97,7 @@ abstract class AbstractSparkAction implements Serializable { } abstract void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException; + throws DocumentException, IOException, ISLookUpException, SAXException; protected static SparkSession getSparkSession(SparkConf conf) { return SparkSession.builder().config(conf).getOrCreate(); @@ -139,9 +143,7 @@ abstract class AbstractSparkAction implements Serializable { c -> c .stream() .filter(Objects::nonNull) - .filter(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())) - .findFirst() - .isPresent()) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) .orElse(false); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java index 558c7c440c..9d767c4d22 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java @@ -23,6 +23,9 @@ public class DatePicker { private static final int YEAR_LB = 1300; private static final int YEAR_UB = Year.now().getValue() + 5; + private DatePicker() { + } + public static Field pick(final Collection dateofacceptance) { final Map frequencies = dateofacceptance @@ -61,7 +64,7 @@ public class DatePicker { .entrySet() .stream() .filter(e -> e.getValue() >= acceptThreshold) - .map(e -> e.getKey()) + .map(Map.Entry::getKey) .collect(Collectors.toList()); // cannot find strong majority diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 5ba6f6e6d5..d65853aff3 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -10,14 +10,11 @@ import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -25,11 +22,12 @@ import scala.Tuple2; public class DedupRecordFactory { - private static final Logger log = LoggerFactory.getLogger(DedupRecordFactory.class); - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private DedupRecordFactory() { + } + public static Dataset createDedupRecord( final SparkSession spark, final DataInfo dataInfo, @@ -67,7 +65,7 @@ public class DedupRecordFactory { value._1()._1(), value._2()._2()), Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) .groupByKey( - (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) + (MapFunction, String>) Tuple2::_1, Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, values) -> entityMerger(key, values, ts, dataInfo, clazz), @@ -91,7 +89,7 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size() > 0) + if (r1.getAuthor() != null && !r1.getAuthor().isEmpty()) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index 5806e9fa45..d79d24653c 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -10,6 +10,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Sets; @@ -25,6 +26,9 @@ public class DedupUtility { public static final String OPENORGS_ID_PREFIX = "openorgs____"; public static final String CORDA_ID_PREFIX = "corda"; + private DedupUtility() { + } + public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -92,14 +96,16 @@ public class DedupUtility { } public static List getConfigurations(String isLookUpUrl, String orchestrator) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); final List configurations = new ArrayList<>(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 5506b54707..2cdc221538 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -33,9 +34,11 @@ public class DispatchEntitiesSparkJob { String jsonConfiguration = IOUtils .toString( - DispatchEntitiesSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")); + Objects + .requireNonNull( + DispatchEntitiesSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcfc..a19f863808 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -42,7 +42,7 @@ public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private final static String ID_JPATH = "$.id"; + private static final String ID_JPATH = "$.id"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -92,7 +92,7 @@ public class GroupEntitiesSparkJob { spark .read() .textFile(toSeq(listEntityPaths(inputPath, sc))) - .map((MapFunction) s -> parseOaf(s), Encoders.kryo(OafEntity.class)) + .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) .agg(aggregator) @@ -188,7 +188,7 @@ public class GroupEntitiesSparkJob { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (IOException e) { - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index dd9b167907..81cd30f88f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -15,13 +15,13 @@ public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) public static String generate(List> pids, String defaultID) { - if (pids == null || pids.size() == 0) + if (pids == null || pids.isEmpty()) return defaultID; Identifier bp = pids .stream() .min(Identifier::compareTo) - .get(); + .orElseThrow(() -> new IllegalStateException("unable to generate id")); String prefix = substringBefore(bp.getOriginalID(), "|"); String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java index 1e13485e5a..c9c9dd8fee 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java @@ -9,7 +9,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -63,7 +63,7 @@ public class SparkBlockStats extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java index 0aaa1e6622..93027e99ab 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java @@ -6,7 +6,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,8 +16,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -78,7 +75,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction { saveParquet(rawRels, outputPath, SaveMode.Append); - log.info("Copied " + rawRels.count() + " Similarity Relations"); + log.info("Copied {} Similarity Relations", rawRels.count()); } private boolean filterOpenorgsRels(Relation rel) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9ece438912..bf0b7f6878 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -2,34 +2,21 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; -import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.*; -import org.apache.spark.sql.Dataset; -import org.dom4j.DocumentException; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java index b41507e950..6989ec54bf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java @@ -13,6 +13,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.EntityType; @@ -54,7 +55,7 @@ public class SparkCreateDedupRecord extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String isLookUpUrl = parser.get("isLookUpUrl"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index bfc6050392..95e3dff281 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -24,6 +24,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; @@ -76,7 +77,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String workingPath = parser.get("workingPath"); @@ -161,11 +162,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction { private ConnectedComponent generateID(String key, Iterator> values) { - List> identifiers = Lists.newArrayList(values).stream().map(v -> { - T entity = v._2(); - Identifier identifier = Identifier.newInstance(entity); - return identifier; - }).collect(Collectors.toList()); + List> identifiers = Lists + .newArrayList(values) + .stream() + .map(v -> Identifier.newInstance(v._2())) + .collect(Collectors.toList()); String rootID = IdGenerator.generate(identifiers, key); @@ -235,7 +236,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { info.setProvenanceaction(provenanceAction); // TODO calculate the trust value based on the similarity score of the elements in the CC - // info.setTrust(); r.setDataInfo(info); return r; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java index df3db7add9..8e5e9fd698 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java @@ -18,9 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index 8849673649..f89f634b59 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -7,7 +7,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -26,8 +26,6 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.FieldListImpl; -import eu.dnetlib.pace.model.FieldValueImpl; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; @@ -56,7 +54,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); @@ -110,9 +108,6 @@ public class SparkCreateSimRels extends AbstractSparkAction { Encoders.bean(Relation.class)); saveParquet(simRels, outputPath, SaveMode.Overwrite); - - log.info("Generated " + simRels.count() + " Similarity Relations"); - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java index 657d5a832b..d12048b028 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java @@ -6,10 +6,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FilterFunction; @@ -23,9 +19,9 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -84,8 +80,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); + final String organizazion = ModelSupport.getMainType(EntityType.organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath); @@ -115,7 +112,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(r -> filterRels(r, "organization")) + .filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization))) // take the worst id of the diffrel: .mapToPair(rel -> { if (DedupUtility.compareOpenOrgIds(rel.getSource(), rel.getTarget()) > 0) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java index 08b39793e7..61325ab502 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -32,6 +33,7 @@ import scala.Tuple3; public class SparkPrepareOrgRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkPrepareOrgRels.class); + public static final String GROUP_PREFIX = "group::"; public SparkPrepareOrgRels(ArgumentApplicationParser parser, SparkSession spark) { super(parser, spark); @@ -41,7 +43,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser.parseArgument(args); @@ -81,8 +83,9 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); + final String organization = ModelSupport.getMainType(EntityType.organization); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset relations = createRelations(spark, mergeRelPath, relationPath, entityPath); @@ -168,7 +171,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .map(g -> Lists.newArrayList(g._2())) .filter(l -> l.size() > 1) .flatMap(l -> { - String groupId = "group::" + UUID.randomUUID(); + String groupId = GROUP_PREFIX + UUID.randomUUID(); List ids = sortIds(l); // sort IDs by type List, String>> rels = new ArrayList<>(); String source = ids.get(0); @@ -192,7 +195,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .collect(Collectors.toList()))) // : take only relations with only the group_id, it // means they are correct. If the diffRel is present the relation has to be removed - .filter(g -> g._2().size() == 1 && g._2().get(0).contains("group::")) + .filter(g -> g._2().size() == 1 && g._2().get(0).contains(GROUP_PREFIX)) .map( t -> new Tuple3<>( t._1().split("@@@")[0], @@ -255,7 +258,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { // Sort IDs basing on the type. Priority: 1) openorgs, 2)corda, 3)alphabetic public static List sortIds(List ids) { - ids.sort((o1, o2) -> DedupUtility.compareOpenOrgIds(o1, o2)); + ids.sort(DedupUtility::compareOpenOrgIds); return ids; } @@ -289,9 +292,10 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { List> rels = new ArrayList<>(); for (String id1 : g._2()) { for (String id2 : g._2()) { - if (!id1.equals(id2)) - if (id1.contains(DedupUtility.OPENORGS_ID_PREFIX) && !id2.contains("openorgsmesh")) - rels.add(new Tuple2<>(id1, id2)); + if (!id1.equals(id2) && id1.contains(DedupUtility.OPENORGS_ID_PREFIX) + && !id2.contains("openorgsmesh")) { + rels.add(new Tuple2<>(id1, id2)); + } } } return rels.iterator(); @@ -310,7 +314,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", r._2()._2().getCollectedfrom().get(0).getValue(), - "group::" + r._1()._1(), + GROUP_PREFIX + r._1()._1(), structuredPropertyListToString(r._2()._2().getPid()), parseECField(r._2()._2().getEclegalbody()), parseECField(r._2()._2().getEclegalperson()), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 220b0f4834..0fa41bd6dc 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -40,7 +40,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPropagateRelation.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); @@ -113,7 +113,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { .join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), Encoders.STRING()) .agg(new RelationAggregator().toColumn()) - .map((MapFunction, Relation>) t -> t._2(), Encoders.bean(Relation.class)); + .map((MapFunction, Relation>) Tuple2::_2, Encoders.bean(Relation.class)); } // redirect the relations to the dedupID @@ -163,7 +163,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { private FilterFunction getRelationFilterFunction() { return r -> StringUtils.isNotBlank(r.getSource()) || StringUtils.isNotBlank(r.getTarget()) || - StringUtils.isNotBlank(r.getRelClass()) || + StringUtils.isNotBlank(r.getRelType()) || StringUtils.isNotBlank(r.getSubRelType()) || StringUtils.isNotBlank(r.getRelClass()); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index fdef7f77d4..49021ab58b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; +import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -13,7 +14,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -26,8 +26,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.util.MapDocumentUtil; @@ -72,83 +74,76 @@ public class SparkUpdateEntity extends AbstractSparkAction { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - // for each entity - ModelSupport.entityTypes - .forEach( - (type, clazz) -> { - final String outputPath = dedupGraphPath + "/" + type; - removeOutputDir(spark, outputPath); - final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); - if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { - JavaRDD sourceEntity = sc - .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + final EntityType type = e.getKey(); + final Class clazz = e.getValue(); + final String outputPath = dedupGraphPath + "/" + type; + removeOutputDir(spark, outputPath); + final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); + if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { + JavaRDD sourceEntity = sc + .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); - if (mergeRelExists(workingPath, type.toString())) { + if (mergeRelExists(workingPath, type.toString())) { - final String mergeRelPath = DedupUtility - .createMergeRelPath(workingPath, "*", type.toString()); - final String dedupRecordPath = DedupUtility - .createDedupRecordPath(workingPath, "*", type.toString()); + final String mergeRelPath = DedupUtility + .createMergeRelPath(workingPath, "*", type.toString()); + final String dedupRecordPath = DedupUtility + .createDedupRecordPath(workingPath, "*", type.toString()); - final Dataset rel = spark - .read() - .load(mergeRelPath) - .as(Encoders.bean(Relation.class)); + final Dataset rel = spark + .read() + .load(mergeRelPath) + .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = rel - .where("relClass == 'merges'") - .where("source != target") - .select(rel.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); + final JavaPairRDD mergedIds = rel + .where("relClass == 'merges'") + .where("source != target") + .select(rel.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); - JavaPairRDD entitiesWithId = sourceEntity - .mapToPair( - (PairFunction) s -> new Tuple2<>( - MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); - if (type == EntityType.organization) // exclude root records from organizations - entitiesWithId = excludeRootOrgs(entitiesWithId, rel); + JavaPairRDD entitiesWithId = sourceEntity + .mapToPair( + (PairFunction) s -> new Tuple2<>( + MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); + if (type == EntityType.organization) // exclude root records from organizations + entitiesWithId = excludeRootOrgs(entitiesWithId, rel); - JavaRDD map = entitiesWithId - .leftOuterJoin(mergedIds) - .map(k -> { - if (k._2()._2().isPresent()) { - return updateDeletedByInference(k._2()._1(), clazz); - } - return k._2()._1(); - }); + JavaRDD map = entitiesWithId + .leftOuterJoin(mergedIds) + .map(k -> { + if (k._2()._2().isPresent()) { + return updateDeletedByInference(k._2()._1(), clazz); + } + return k._2()._1(); + }); - sourceEntity = map.union(sc.textFile(dedupRecordPath)); - - } - - sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); - } - }); + sourceEntity = map.union(sc.textFile(dedupRecordPath)); + } + sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); + } + } } - public boolean mergeRelExists(String basePath, String entity) { + public boolean mergeRelExists(String basePath, String entity) throws IOException { boolean result = false; - try { - FileSystem fileSystem = FileSystem.get(new Configuration()); - FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); + FileSystem fileSystem = FileSystem.get(new Configuration()); + FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); - for (FileStatus fs : fileStatuses) { - if (fs.isDirectory()) - if (fileSystem - .exists( - new Path(DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)))) - result = true; + for (FileStatus fs : fileStatuses) { + final Path mergeRelPath = new Path( + DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)); + if (fs.isDirectory() && fileSystem.exists(mergeRelPath)) { + result = true; } - - return result; - } catch (IOException e) { - throw new RuntimeException(e); } + + return result; } private static String updateDeletedByInference( diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java index 3a986a9dd8..3e564052e6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java @@ -3,29 +3,15 @@ package eu.dnetlib.dhp.oa.dedup.graph; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang.StringUtils; -import org.apache.spark.api.java.function.MapFunction; import org.codehaus.jackson.annotate.JsonIgnore; -import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.DedupUtility; -import eu.dnetlib.dhp.oa.dedup.IdGenerator; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.PaceException; public class ConnectedComponent implements Serializable { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index e821d7ef54..a25a853ef6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; import eu.dnetlib.dhp.schema.oaf.utils.PidType; -public class Identifier implements Serializable, Comparable { +public class Identifier implements Serializable, Comparable> { public static final String DATE_FORMAT = "yyyy-MM-dd"; public static final String BASE_DATE = "2000-01-01"; @@ -29,8 +29,8 @@ public class Identifier implements Serializable, Comparable // cached date value private Date date = null; - public static Identifier newInstance(T entity) { - return new Identifier(entity); + public static Identifier newInstance(T entity) { + return new Identifier<>(entity); } public Identifier(T entity) { @@ -88,7 +88,7 @@ public class Identifier implements Serializable, Comparable } @Override - public int compareTo(Identifier i) { + public int compareTo(Identifier i) { // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) // alphabetical order of the originalID diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java index 7c58c375ac..daea29a070 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java @@ -10,12 +10,12 @@ import org.junit.jupiter.api.Test; import com.clearspring.analytics.util.Lists; -public class DatePickerTest { +class DatePickerTest { Collection dates = Lists.newArrayList(); @Test - public void testPickISO() { + void testPickISO() { dates.add("2016-01-01T12:00:00Z"); dates.add("2016-06-16T12:00:00Z"); dates.add("2020-01-01T12:00:00Z"); @@ -24,7 +24,7 @@ public class DatePickerTest { } @Test - public void testPickSimple() { + void testPickSimple() { dates.add("2016-01-01"); dates.add("2016-06-16"); dates.add("2020-01-01"); @@ -33,7 +33,7 @@ public class DatePickerTest { } @Test - public void testPickFrequent() { + void testPickFrequent() { dates.add("2016-02-01"); dates.add("2016-02-01"); dates.add("2016-02-01"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 80154fbb7c..e86f91f996 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class EntityMergerTest implements Serializable { +class EntityMergerTest implements Serializable { private List> publications; private List> publications2; @@ -54,7 +54,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void softwareMergerTest() throws InstantiationException, IllegalAccessException { + void softwareMergerTest() throws InstantiationException, IllegalAccessException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); @@ -69,7 +69,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + void publicationMergerTest() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); @@ -125,7 +125,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); @@ -137,7 +137,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest3() throws InstantiationException, IllegalAccessException { + void publicationMergerTest3() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); @@ -147,7 +147,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); @@ -157,7 +157,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { System.out .println( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java index 1a279fac76..2d66378828 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java @@ -56,7 +56,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest1() { + void generateIdTest1() { String id1 = IdGenerator.generate(bestIds, "50|defaultID"); System.out @@ -66,7 +66,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest2() { + void generateIdTest2() { String id1 = IdGenerator.generate(bestIds2, "50|defaultID"); String id2 = IdGenerator.generate(bestIds3, "50|defaultID"); @@ -82,7 +82,7 @@ public class IdGeneratorTest { } @Test - public void generateIdOrganizationTest() { + void generateIdOrganizationTest() { String id1 = IdGenerator.generate(bestIdsOrg, "20|defaultID"); assertEquals("20|openorgs____::599c15a70fcb03be6ba08f75f14d6076", id1); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bf4913056d..1860a3ff0e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -148,7 +147,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -203,7 +202,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(2) - public void cutMergeRelsTest() throws Exception { + void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -299,7 +298,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -355,7 +354,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(4) - public void createDedupRecordTest() throws Exception { + void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -402,7 +401,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(5) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -518,7 +517,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(6) - public void propagateRelationTest() throws Exception { + void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -568,7 +567,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(7) - public void testRelations() throws Exception { + void testRelations() throws Exception { testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 97cfab1187..9312d83b10 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -67,7 +67,7 @@ public class SparkOpenorgsDedupTest implements Serializable { public static void cleanUp() throws IOException, URISyntaxException { testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) + .get(SparkOpenorgsDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) .toFile() .getAbsolutePath(); testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") @@ -101,7 +101,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_openorgs.xml"))); @@ -110,14 +110,14 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); } @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -148,7 +148,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(2) - public void copyOpenorgsSimRels() throws Exception { + void copyOpenorgsSimRels() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( @@ -177,7 +177,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -230,11 +230,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(4) - public void prepareOrgRelsTest() throws Exception { + void prepareOrgRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser @@ -313,11 +313,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(5) - public void prepareNewOrgsTest() throws Exception { + void prepareNewOrgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareNewOrgs.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareNewOrgs_parameters.json"))); parser diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 606dd9e5bc..2349ffebe8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.Mockito.lenient; import java.io.File; @@ -12,8 +11,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.Collections; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -32,9 +29,6 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -110,7 +104,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(1) - public void copyOpenorgsMergeRelTest() throws Exception { + void copyOpenorgsMergeRelTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -143,7 +137,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(2) - public void createOrgsDedupRecordTest() throws Exception { + void createOrgsDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -176,7 +170,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(3) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -216,7 +210,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(4) - public void copyRelationsNoOpenorgsTest() throws Exception { + void copyRelationsNoOpenorgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -239,7 +233,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(5) - public void propagateRelationsTest() throws Exception { + void propagateRelationsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 31de8d951a..2efbe260af 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -124,7 +124,7 @@ public class SparkStatsTest implements Serializable { } @Test - public void createBlockStatsTest() throws Exception { + void createBlockStatsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 1759180d27..7348a3bd20 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.oa.dedup.jpath; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.platform.commons.util.StringUtils; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; -public class JsonPathTest { +class JsonPathTest { String json = "{\t\"dataInfo\":{\t\t\"invisible\":false,\t\t\"inferred\":false,\t\t\"deletedbyinference\":false,\t\t\"trust\":\"0.810000002384185791\",\t\t\"inferenceprovenance\":\"\",\t\t\"provenanceaction\":{\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t}\t},\t\"lastupdatetimestamp\":1584960968152,\t\"id\":\"20|corda__h2020::9faf23721249f26ac2c16eb857ea1fb9\",\t\"originalId\":[\t\t\"corda__h2020::927957582\"\t],\t\"collectedfrom\":[\t\t{\t\t\t\"key\":\"openaire____::corda_h2020\",\t\t\t\"value\":\"CORDA - COmmon Research DAta Warehouse - Horizon 2020\",\t\t\t\"dataInfo\":null\t\t}\t],\t\"pid\":[\t],\t\"dateofcollection\":\"2016-06-05\",\t\"dateoftransformation\":\"2019-11-19\",\t\"extraInfo\":[\t],\t\"oaiprovenance\":null,\t\"legalshortname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"legalname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"alternativeNames\":[\t],\t\"websiteurl\":{\t\t\"value\":\"http://www.comentor.se\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"logourl\":null,\t\"eclegalbody\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"eclegalperson\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnonprofit\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecresearchorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"echighereducation\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganizationeurinterests\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecenterprise\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecsmevalidated\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnutscode\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"country\":null}"; DedupConfig conf = DedupConfig @@ -283,15 +285,18 @@ public class JsonPathTest { + "}"); @Test - public void testJPath() throws Exception { + void testJPath() { MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(conf, json); + Assertions.assertNotNull(d); + Assertions.assertTrue(StringUtils.isNotBlank(d.getIdentifier())); + System.out.println("d = " + d); } @Test - public void testNull() throws Exception { + void testNull() { final Object p = null; System.out.println((String) p); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java index ee6136b588..c6c2077278 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref; import java.io.ByteArrayOutputStream; +import java.util.Objects; import java.util.Optional; import java.util.zip.Inflater; @@ -22,9 +23,11 @@ public class CrossrefImporter { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - CrossrefImporter.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/import_from_es.json"))); + Objects + .requireNonNull( + CrossrefImporter.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/import_from_es.json")))); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java index dcebbbcac8..6d6a4ca785 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref; +import java.io.IOException; import java.util.Iterator; import java.util.List; @@ -11,8 +12,6 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.jayway.jsonpath.JsonPath; @@ -57,8 +56,8 @@ public class ESClient implements Iterator { try (CloseableHttpResponse response = client.execute(httpPost)) { return IOUtils.toString(response.getEntity().getContent()); } - } catch (Throwable e) { - throw new RuntimeException("Error on executing request ", e); + } catch (IOException e) { + throw new IllegalStateException("Error on executing request ", e); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index feb540fcd4..f725b3222d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -29,8 +29,10 @@ public class ActivitiesDecompressor { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; - public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) - throws Exception { + private ActivitiesDecompressor() { + } + + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws IOException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); @@ -44,7 +46,7 @@ public class ActivitiesDecompressor { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,8 +54,7 @@ public class ActivitiesDecompressor { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int doiFound = 0; int errorFromOrcidFound = 0; @@ -79,11 +80,11 @@ public class ActivitiesDecompressor { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes()); + WorkData workData = XMLRecordParser.VTDParseWorkData(builder.toString().getBytes()); if (workData != null) { if (workData.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -113,7 +114,7 @@ public class ActivitiesDecompressor { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } @@ -177,11 +178,11 @@ public class ActivitiesDecompressor { counter++; BufferedReader br = new BufferedReader(new InputStreamReader(tais)); String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - String xml = buffer.toString(); + String xml = builder.toString(); String[] filenameParts = filename.split("/"); final Text key = new Text( XMLRecordParser diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java index 4de4a02665..99587b16a4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -10,7 +11,6 @@ import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; public class ExtractXMLActivitiesData extends OrcidDSManager { private String outputWorksPath; @@ -22,11 +22,11 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { extractXMLActivitiesData.extractWorks(); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLActivitiesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); parser.parseArgument(args); @@ -43,7 +43,6 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { private void extractWorks() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java index 5c2a35229a..4121f33913 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java @@ -5,12 +5,13 @@ import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; +import eu.dnetlib.dhp.parser.utility.VtdException; public class ExtractXMLSummariesData extends OrcidDSManager { @@ -27,7 +28,7 @@ public class ExtractXMLSummariesData extends OrcidDSManager { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLSummariesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); parser.parseArgument(args); @@ -42,9 +43,8 @@ public class ExtractXMLSummariesData extends OrcidDSManager { Log.info("Output Authors Data: " + outputAuthorsPath); } - public void extractAuthors() throws Exception { + public void extractAuthors() throws IOException, VtdException, ParseException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 3b40334509..7e4869fddc 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -22,9 +22,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { orcidAuthorsDOIsDataGen.generateAuthorsDOIsData(); } - public void generateAuthorsDOIsData() throws Exception { + public void generateAuthorsDOIsData() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 73a4bfd05a..0b4ef279d0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -25,9 +25,8 @@ public class OrcidDSManager { orcidDSManager.generateAuthors(); } - public void generateAuthors() throws Exception { + public void generateAuthors() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8cf070213c..2b8e42bf63 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.FileNotFoundException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -64,7 +65,7 @@ public class SparkDownloadOrcidAuthors { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -82,7 +83,7 @@ public class SparkDownloadOrcidAuthors { JavaPairRDD lamdaFileRDD = sc .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); - logger.info("Data retrieved: " + lamdaFileRDDCount); + logger.info("Data retrieved: {}", lamdaFileRDDCount); Function, Boolean> isModifiedAfterFilter = data -> { String orcidId = data._1().toString(); @@ -95,7 +96,7 @@ public class SparkDownloadOrcidAuthors { return false; }; - Function, Tuple2> downloadRecordFunction = data -> { + Function, Tuple2> downloadRecordFn = data -> { String orcidId = data._1().toString(); String lastModifiedDate = data._2().toString(); final DownloadedRecordData downloaded = new DownloadedRecordData(); @@ -118,14 +119,19 @@ public class SparkDownloadOrcidAuthors { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); } @@ -145,33 +151,41 @@ public class SparkDownloadOrcidAuthors { logger.info("Start execution ..."); JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); long authorsModifiedCount = authorsModifiedRDD.count(); - logger.info("Authors modified count: " + authorsModifiedCount); + logger.info("Authors modified count: {}", authorsModifiedCount); logger.info("Start downloading ..."); - authorsModifiedRDD - .repartition(100) - .map(downloadRecordFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) - .saveAsNewAPIHadoopFile( - workingPath.concat(outputPath), - Text.class, - Text.class, - SequenceFileOutputFormat.class, - sc.hadoopConfiguration()); - logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); - logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP404Acc: " + errorHTTP404Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + final JavaPairRDD pairRDD = authorsModifiedRDD + .repartition(100) + .map(downloadRecordFn) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); + + saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); + + logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); + logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP404Acc: {}", errorHTTP404Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } + private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, + JavaPairRDD pairRDD) { + pairRDD + .saveAsNewAPIHadoopFile( + workingPath.concat(outputPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); + } + public static boolean isModified(String orcidId, String modifiedDate, String lastUpdate) { Date modifiedDateDt; Date lastUpdateDt; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 59de7ca807..cab5387835 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; -import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; @@ -61,7 +58,7 @@ public class SparkDownloadOrcidWorks { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); final String token = parser.get("token"); final String hdfsServerUri = parser.get("hdfsServerUri"); @@ -169,20 +166,25 @@ public class SparkDownloadOrcidWorks { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); logger .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); + "Downloading {} status code: {}", orcidId, + response.getStatusLine().getStatusCode()); } return downloaded.toTuple2(); } @@ -199,24 +201,24 @@ public class SparkDownloadOrcidWorks { .flatMap(retrieveWorkUrlFunction) .repartition(100) .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); - logger.info("updatedAuthorsAcc: " + updatedAuthorsAcc.value().toString()); - logger.info("parsedAuthorsAcc: " + parsedAuthorsAcc.value().toString()); - logger.info("parsedWorksAcc: " + parsedWorksAcc.value().toString()); - logger.info("modifiedWorksAcc: " + modifiedWorksAcc.value().toString()); - logger.info("maxModifiedWorksLimitAcc: " + maxModifiedWorksLimitAcc.value().toString()); - logger.info("errorCodeFoundAcc: " + errorCodeFoundAcc.value().toString()); - logger.info("errorLoadingJsonFoundAcc: " + errorLoadingJsonFoundAcc.value().toString()); - logger.info("errorLoadingXMLFoundAcc: " + errorLoadingXMLFoundAcc.value().toString()); - logger.info("errorParsingXMLFoundAcc: " + errorParsingXMLFoundAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); + logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); + logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); + logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); + logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); + logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); + logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); + logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); + logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java index 178d076089..d2fed61ec2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -3,7 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.*; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.URI; import java.util.Arrays; import java.util.List; @@ -15,7 +16,6 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; @@ -28,10 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; public class SparkGenLastModifiedSeq { - private static String hdfsServerUri; - private static String workingPath; - private static String outputPath; - private static String lambdaFileName; public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -45,12 +41,14 @@ public class SparkGenLastModifiedSeq { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - hdfsServerUri = parser.get("hdfsServerUri"); - workingPath = parser.get("workingPath"); - outputPath = parser.get("outputPath"); - lambdaFileName = parser.get("lambdaFileName"); - String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); - String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; + + final String hdfsServerUri = parser.get("hdfsServerUri"); + final String workingPath = parser.get("workingPath"); + final String outputPath = parser.get("outputPath"); + final String lambdaFileName = parser.get("lambdaFileName"); + + final String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + final String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; SparkConf sparkConf = new SparkConf(); runWithSparkSession( @@ -64,7 +62,7 @@ public class SparkGenLastModifiedSeq { .concat(workingPath) .concat(outputPath)); Path hdfsreadpath = new Path(lambdaFileUri); - Configuration conf = new Configuration(); + Configuration conf = spark.sparkContext().hadoopConfiguration(); conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index 7d9f39d05e..3ecb4f5e3e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -53,13 +52,13 @@ public class SparkGenerateDoiAuthorList { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath"); - logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath); + logger.info("outputDoiAuthorListPath: {}", outputDoiAuthorListPath); final String authorsPath = parser.get("authorsPath"); - logger.info("authorsPath: ", authorsPath); + logger.info("authorsPath: {}", authorsPath); final String xmlWorksPath = parser.get("xmlWorksPath"); - logger.info("xmlWorksPath: ", xmlWorksPath); + logger.info("xmlWorksPath: {}", xmlWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -128,8 +127,7 @@ public class SparkGenerateDoiAuthorList { .concat( d1.stream(), d2.stream()); - List mergedAuthors = mergedStream.collect(Collectors.toList()); - return mergedAuthors; + return mergedStream.collect(Collectors.toList()); } if (d1 != null) { return d1; @@ -170,14 +168,6 @@ public class SparkGenerateDoiAuthorList { return authorData; } - private static WorkData loadWorkFromJson(Text orcidId, Text json) { - WorkData workData = new WorkData(); - workData.setOid(orcidId.toString()); - JsonElement jElement = new JsonParser().parse(json.toString()); - workData.setDoi(getJsonValue(jElement, "doi")); - return workData; - } - private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { JsonElement name = null; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java index 51326c6103..1727f18254 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static org.apache.spark.sql.functions.*; -import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -17,8 +16,10 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,11 +36,12 @@ import scala.Tuple2; public class SparkUpdateOrcidAuthors { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +55,6 @@ public class SparkUpdateOrcidAuthors { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -87,7 +88,7 @@ public class SparkUpdateOrcidAuthors { String jsonData = data._2().toString(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { @@ -135,7 +136,7 @@ public class SparkUpdateOrcidAuthors { .col("authorData.oid") .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), "full_outer") - .map(value -> { + .map((MapFunction, AuthorSummary>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -183,7 +184,8 @@ public class SparkUpdateOrcidAuthors { .groupBy("authorData.oid") .agg(array_max(collect_list("downloadDate"))) .map( - row -> new Tuple2<>(row.get(0).toString(), row.get(1).toString()), + (MapFunction>) row -> new Tuple2<>(row.get(0).toString(), + row.get(1).toString()), Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .toJavaRDD() .collect(); @@ -214,25 +216,24 @@ public class SparkUpdateOrcidAuthors { .dropDuplicates("downloadDate", "authorData"); cleanedDS .toJavaRDD() - .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); long cleanedDSCount = cleanedDS.count(); - logger.info("report_oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); - logger.info("report_newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); - logger.info("report_updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); - logger.info("report_errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); - logger.info("report_errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); - logger.info("report_errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - logger.info("report_merged_count: " + mergedCount); - logger.info("report_cleaned_count: " + cleanedDSCount); + logger.info("report_oldAuthorsFoundAcc: {}", oldAuthorsFoundAcc.value()); + logger.info("report_newAuthorsFoundAcc: {}", newAuthorsFoundAcc.value()); + logger.info("report_updatedAuthorsFoundAcc: {}", updatedAuthorsFoundAcc.value()); + logger.info("report_errorCodeFoundAcc: {}", errorCodeAuthorsFoundAcc.value()); + logger.info("report_errorLoadingJsonFoundAcc: {}", errorLoadingAuthorsJsonFoundAcc.value()); + logger.info("report_errorParsingXMLFoundAcc: {}", errorParsingAuthorsXMLFoundAcc.value()); + logger.info("report_merged_count: {}", mergedCount); + logger.info("report_cleaned_count: {}", cleanedDSCount); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java index fa17e97e34..aad202ff65 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java @@ -3,17 +3,16 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -26,20 +25,19 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; import scala.Tuple2; public class SparkUpdateOrcidDatasets { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +51,6 @@ public class SparkUpdateOrcidDatasets { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -62,25 +59,6 @@ public class SparkUpdateOrcidDatasets { spark -> { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - LongAccumulator oldAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("old_authors_found"); - LongAccumulator updatedAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("updated_authors_found"); - LongAccumulator newAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("new_authors_found"); - LongAccumulator errorCodeAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("error_code_authors_found"); - LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_authors_json_found"); - LongAccumulator errorParsingAuthorsXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_parsing_authors_xml_found"); - LongAccumulator oldWorksFoundAcc = spark .sparkContext() .longAccumulator("old_works_found"); @@ -100,125 +78,11 @@ public class SparkUpdateOrcidDatasets { .sparkContext() .longAccumulator("error_parsing_works_xml_found"); -// JavaPairRDD xmlSummariesRDD = sc -// .sequenceFile(workingPath.concat("xml/authors/xml_authors.seq"), Text.class, Text.class); -// xmlSummariesRDD -// .map(seq -> { -// AuthorSummary authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(seq._2().toString().getBytes()); -// authorSummary -// .setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return authorSummary; -// }) -// .filter(authorSummary -> authorSummary != null) -// .map(authorSummary -> JsonWriter.create(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/authors"), GzipCodec.class); -// -// JavaPairRDD xmlWorksRDD = sc -// .sequenceFile(workingPath.concat("xml/works/*"), Text.class, Text.class); -// -// xmlWorksRDD -// .map(seq -> { -// WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(seq._2().toString().getBytes()); -// Work work = new Work(); -// work.setWorkDetail(workDetail); -// work.setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return work; -// }) -// .filter(work -> work != null) -// .map(work -> JsonWriter.create(work)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/works"), GzipCodec.class); - -// Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { -// AuthorSummary authorSummary = new AuthorSummary(); -// String orcidId = data._1().toString(); -// String jsonData = data._2().toString(); -// JsonElement jElement = new JsonParser().parse(jsonData); -// String statusCode = getJsonValue(jElement, "statusCode"); -// String downloadDate = getJsonValue(jElement, "lastModifiedDate"); -// if (statusCode.equals("200")) { -// String compressedData = getJsonValue(jElement, "compressedData"); -// if (StringUtils.isEmpty(compressedData)) { -// errorLoadingAuthorsJsonFoundAcc.add(1); -// } else { -// String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); -// try { -// authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(xmlAuthor.getBytes()); -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// authorSummary.setBase64CompressData(compressedData); -// return authorSummary; -// } catch (Exception e) { -// logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); -// errorParsingAuthorsXMLFoundAcc.add(1); -// } -// } -// } else { -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// errorCodeAuthorsFoundAcc.add(1); -// } -// return authorSummary; -// }; -// -// Dataset downloadedAuthorSummaryDS = spark -// .createDataset( -// sc -// .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) -// .map(retrieveAuthorSummaryFunction) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// Dataset currentAuthorSummaryDS = spark -// .createDataset( -// sc -// .textFile(workingPath.concat("orcid_dataset/authors/*")) -// .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// currentAuthorSummaryDS -// .joinWith( -// downloadedAuthorSummaryDS, -// currentAuthorSummaryDS -// .col("authorData.oid") -// .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), -// "full_outer") -// .map(value -> { -// Optional opCurrent = Optional.ofNullable(value._1()); -// Optional opDownloaded = Optional.ofNullable(value._2()); -// if (!opCurrent.isPresent()) { -// newAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// if (!opDownloaded.isPresent()) { -// oldAuthorsFoundAcc.add(1); -// return opCurrent.get(); -// } -// if (opCurrent.isPresent() && opDownloaded.isPresent()) { -// updatedAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// return null; -// }, -// Encoders.bean(AuthorSummary.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); -// -// logger.info("oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); -// logger.info("newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); -// logger.info("updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); -// logger.info("errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); -// logger.info("errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); -// logger.info("errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - - Function retrieveWorkFunction = jsonData -> { + final Function retrieveWorkFunction = jsonData -> { Work work = new Work(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate("2020-11-18 00:00:05.644768"); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -247,9 +111,7 @@ public class SparkUpdateOrcidDatasets { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -271,7 +133,7 @@ public class SparkUpdateOrcidDatasets { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -291,23 +153,22 @@ public class SparkUpdateOrcidDatasets { Encoders.bean(Work.class)) .filter(Objects::nonNull) .toJavaRDD() - .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java index 5ebbc01eda..64523941d7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -29,14 +30,16 @@ import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import scala.Tuple2; public class SparkUpdateOrcidWorks { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -83,7 +86,6 @@ public class SparkUpdateOrcidWorks { JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate(Long.toString(System.currentTimeMillis())); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -112,9 +114,7 @@ public class SparkUpdateOrcidWorks { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -136,7 +136,7 @@ public class SparkUpdateOrcidWorks { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -159,12 +159,12 @@ public class SparkUpdateOrcidWorks { .map(work -> OBJECT_MAPPER.writeValueAsString(work)) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); String lastModifiedDateFromLambdaFile = HDFSUtil .readFromTextFile(hdfsServerUri, workingPath, "last_modified_date_from_lambda_file.txt"); @@ -175,8 +175,7 @@ public class SparkUpdateOrcidWorks { private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index c85b5b6918..af6def227c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -20,6 +20,9 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + +import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; @@ -28,22 +31,23 @@ public class SummariesDecompressor { private static final int MAX_XML_RECORDS_PARSED = -1; - public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) - throws Exception { - String uri = inputUri; - FileSystem fs = FileSystem.get(URI.create(uri), conf); - Path inputPath = new Path(uri); + private SummariesDecompressor() { + } + + public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) throws IOException { + FileSystem fs = FileSystem.get(URI.create(inputUri), conf); + Path inputPath = new Path(inputUri); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { - System.err.println("No codec found for " + uri); + System.err.println("No codec found for " + inputUri); System.exit(1); } - CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + CompressionCodecFactory.removeSuffix(inputUri, codec.getDefaultExtension()); InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarSummaries(fs, conf, gzipInputStream, outputPath); + parseTarSummaries(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,7 +56,7 @@ public class SummariesDecompressor { } private static void parseTarSummaries( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int nameFound = 0; int surnameFound = 0; @@ -163,7 +167,7 @@ public class SummariesDecompressor { } public static void extractXML(Configuration conf, String inputUri, Path outputPath) - throws Exception { + throws IOException, VtdException, ParseException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index a2342f7b4d..9eb73b240c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; public class JsonHelper { + private JsonHelper() { + } + public static String createOidWork(WorkDetail workData) { return new Gson().toJson(workData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java index e1a913476b..bbd2e1f7e1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java @@ -2,10 +2,8 @@ package eu.dnetlib.doiboost.orcid.util; import java.io.*; -import java.net.URI; import java.nio.charset.StandardCharsets; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,42 +12,39 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - -import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors; - public class HDFSUtil { static Logger logger = LoggerFactory.getLogger(HDFSUtil.class); + private HDFSUtil() { + } + private static FileSystem getFileSystem(String hdfsServerUri) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsServerUri); - FileSystem fileSystem = FileSystem.get(conf); - return fileSystem; + return FileSystem.get(conf); } public static String readFromTextFile(String hdfsServerUri, String workingPath, String path) throws IOException { FileSystem fileSystem = getFileSystem(hdfsServerUri); Path toReadPath = new Path(workingPath.concat(path)); if (!fileSystem.exists(toReadPath)) { - throw new RuntimeException("File not exist: " + path); + throw new IOException("File not exist: " + path); } - logger.info("Last_update_path " + toReadPath); + logger.info("Last_update_path {}", toReadPath); FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); - BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); - StringBuffer sb = new StringBuffer(); - try { + try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) { + StringBuilder sb = new StringBuilder(); + String line; while ((line = br.readLine()) != null) { sb.append(line); } - } finally { - br.close(); + + String buffer = sb.toString(); + logger.info("Last_update: {}", buffer); + return buffer; } - String buffer = sb.toString(); - logger.info("Last_update: " + buffer); - return buffer; } public static void writeToTextFile(String hdfsServerUri, String workingPath, String path, String text) @@ -60,8 +55,8 @@ public class HDFSUtil { fileSystem.delete(toWritePath, true); } FSDataOutputStream os = fileSystem.create(toWritePath); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); - br.write(text); - br.close(); + try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8))) { + br.write(text); + } } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index 52e0761055..e8745aa96c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,6 @@ package eu.dnetlib.doiboost.orcid.xml; -import java.io.IOException; import java.util.*; import org.apache.commons.lang3.StringUtils; @@ -38,6 +37,9 @@ public class XMLRecordParser { private static final String NS_ERROR = "error"; + private XMLRecordParser() { + } + public static AuthorData VTDParseAuthorData(byte[] bytes) throws VtdException, ParseException { final VTDGen vg = new VTDGen(); @@ -90,46 +92,6 @@ public class XMLRecordParser { authorData.setOtherNames(otherNames); } -// final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); -// if (StringUtils.isNoneBlank(creationMethod)) { -// authorData.setCreationMethod(creationMethod); -// } -// -// final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); -// if (StringUtils.isNoneBlank(completionDate)) { -// authorData.setCompletionDate(completionDate); -// } -// -// final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); -// if (StringUtils.isNoneBlank(submissionDate)) { -// authorData.setSubmissionDate(submissionDate); -// } -// -// final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); -// if (StringUtils.isNoneBlank(claimed)) { -// authorData.setClaimed(Boolean.parseBoolean(claimed)); -// } -// -// final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); -// if (StringUtils.isNoneBlank(verifiedEmail)) { -// authorData.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); -// } -// -// final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); -// if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { -// authorData.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); -// } -// -// final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); -// if (StringUtils.isNoneBlank(deactivationDate)) { -// authorData.setDeactivationDate(deactivationDate); -// } -// -// final String lastModifiedDate = VtdUtilityParser -// .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); -// if (StringUtils.isNoneBlank(lastModifiedDate)) { -// authorData.setLastModifiedDate(lastModifiedDate); -// } return authorData; } @@ -207,7 +169,7 @@ public class XMLRecordParser { } public static Map retrieveWorkIdLastModifiedDate(byte[] bytes) - throws ParseException, XPathParseException, NavException, XPathEvalException, IOException { + throws ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); @@ -251,15 +213,15 @@ public class XMLRecordParser { ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); - AuthorData authorData = retrieveAuthorData(ap, vn, bytes); - AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn, bytes); + AuthorData authorData = retrieveAuthorData(ap, vn); + AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn); AuthorSummary authorSummary = new AuthorSummary(); authorSummary.setAuthorData(authorData); authorSummary.setAuthorHistory(authorHistory); return authorSummary; } - private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn) throws VtdException { AuthorData authorData = new AuthorData(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); @@ -300,7 +262,7 @@ public class XMLRecordParser { return authorData; } - private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn) throws VtdException { AuthorHistory authorHistory = new AuthorHistory(); final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 124a1b9efb..ddbf71bbbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -33,6 +33,9 @@ public class ActivitiesDumpReader { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + private ActivitiesDumpReader() { + } + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { String uri = inputUri; @@ -48,7 +51,7 @@ public class ActivitiesDumpReader { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -56,8 +59,7 @@ public class ActivitiesDumpReader { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int noDoiFound = 0; int errorFromOrcidFound = 0; @@ -73,7 +75,7 @@ public class ActivitiesDumpReader { SequenceFile.Writer.valueClass(Text.class))) { while ((entry = tais.getNextTarEntry()) != null) { String filename = entry.getName(); - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); try { if (entry.isDirectory() || !filename.contains("works")) { @@ -83,12 +85,12 @@ public class ActivitiesDumpReader { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - buffer = new StringBuffer(); + builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } WorkDetail workDetail = XMLRecordParserNoDoi - .VTDParseWorkData(buffer.toString().getBytes()); + .VTDParseWorkData(builder.toString().getBytes()); if (workDetail != null) { if (workDetail.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -123,7 +125,7 @@ public class ActivitiesDumpReader { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 4a64124d12..5c23a33a8f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; @@ -16,7 +16,6 @@ import eu.dnetlib.doiboost.orcid.OrcidDSManager; * This job generates one sequence file, the key is an orcid identifier and the * value is an orcid publication in json format */ - public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; @@ -30,13 +29,12 @@ public class GenOrcidAuthorWork extends OrcidDSManager { public void generateAuthorsDOIsData() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath)); ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 1d47808ef1..db3b149233 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; +import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -14,21 +14,16 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; -import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; @@ -75,7 +70,7 @@ public class SparkGenEnrichedOrcidWorks { spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } final String dateOfCollection = lastUpdate.substring(0, 10); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -86,10 +81,10 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/authors/*")) .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) .filter(authorSummary -> authorSummary.getAuthorData() != null) - .map(authorSummary -> authorSummary.getAuthorData()) + .map(AuthorSummary::getAuthorData) .rdd(), Encoders.bean(AuthorData.class)); - logger.info("Authors data loaded: " + authorDataset.count()); + logger.info("Authors data loaded: {}", authorDataset.count()); Dataset workDataset = spark .createDataset( @@ -97,7 +92,7 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/works/*")) .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) .filter(work -> work.getWorkDetail() != null) - .map(work -> work.getWorkDetail()) + .map(Work::getWorkDetail) .filter(work -> work.getErrorCode() == null) .filter( work -> work @@ -107,7 +102,7 @@ public class SparkGenEnrichedOrcidWorks { .noneMatch(e -> e.getType().equalsIgnoreCase("doi"))) .rdd(), Encoders.bean(WorkDetail.class)); - logger.info("Works data loaded: " + workDataset.count()); + logger.info("Works data loaded: {}", workDataset.count()); final LongAccumulator warnNotFoundContributors = spark .sparkContext() @@ -122,7 +117,7 @@ public class SparkGenEnrichedOrcidWorks { WorkDetail w = value._1; AuthorData a = value._2; if (w.getContributors() == null - || (w.getContributors() != null && w.getContributors().size() == 0)) { + || (w.getContributors() != null && w.getContributors().isEmpty())) { Contributor c = new Contributor(); c.setName(a.getName()); c.setSurname(a.getSurname()); @@ -163,7 +158,6 @@ public class SparkGenEnrichedOrcidWorks { final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, - errorsGeneric, errorsInvalidTitle, errorsNotFoundAuthors, errorsInvalidType, @@ -172,13 +166,10 @@ public class SparkGenEnrichedOrcidWorks { titleNotProvidedAcc, noUrlAcc, dateOfCollection); + JavaRDD oafPublicationRDD = enrichedWorksRDD - .map( - e -> { - return (Publication) publicationToOaf - .generatePublicationActionsFromJson(e._2()); - }) - .filter(p -> p != null); + .map(e -> (Publication) publicationToOaf.generatePublicationActionsFromJson(e._2())) + .filter(Objects::nonNull); sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); @@ -186,7 +177,7 @@ public class SparkGenEnrichedOrcidWorks { .mapToPair( p -> new Tuple2<>(p.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( outputEnrichedWorksPath, Text.class, @@ -194,17 +185,17 @@ public class SparkGenEnrichedOrcidWorks { SequenceFileOutputFormat.class, sc.hadoopConfiguration()); - logger.info("parsedPublications: " + parsedPublications.value().toString()); - logger.info("enrichedPublications: " + enrichedPublications.value().toString()); - logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); - logger.info("errorsGeneric: " + errorsGeneric.value().toString()); - logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); - logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); - logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); - logger.info("otherTypeFound: " + otherTypeFound.value().toString()); - logger.info("deactivatedAcc: " + deactivatedAcc.value().toString()); - logger.info("titleNotProvidedAcc: " + titleNotProvidedAcc.value().toString()); - logger.info("noUrlAcc: " + noUrlAcc.value().toString()); + logger.info("parsedPublications: {}", parsedPublications.value()); + logger.info("enrichedPublications: {}", enrichedPublications.value()); + logger.info("warnNotFoundContributors: {}", warnNotFoundContributors.value()); + logger.info("errorsGeneric: {}", errorsGeneric.value()); + logger.info("errorsInvalidTitle: {}", errorsInvalidTitle.value()); + logger.info("errorsNotFoundAuthors: {}", errorsNotFoundAuthors.value()); + logger.info("errorsInvalidType: {}", errorsInvalidType.value()); + logger.info("otherTypeFound: {}", otherTypeFound.value()); + logger.info("deactivatedAcc: {}", deactivatedAcc.value()); + logger.info("titleNotProvidedAcc: {}", titleNotProvidedAcc.value()); + logger.info("noUrlAcc: {}", noUrlAcc.value()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 23e9dd884c..33f3b3bbbd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -18,6 +18,9 @@ public class JsonWriter { public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); + private JsonWriter() { + } + public static String create(AuthorData authorData) throws JsonProcessingException { return OBJECT_MAPPER.writeValueAsString(authorData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 2157538990..f92040c24e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,6 +18,7 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -26,21 +27,19 @@ import eu.dnetlib.doiboost.orcidnodoi.util.Pair; /** * This class converts an orcid publication from json format to oaf */ - public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); - public final static String orcidPREFIX = "orcid_______"; + public static final String orcidPREFIX = "orcid_______"; public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; + public static final String SEPARATOR = IdentifierFactory.ID_SEPARATOR; public static final String DEACTIVATED_NAME = "Given Names Deactivated"; public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; private String dateOfCollection = ""; private final LongAccumulator parsedPublications; private final LongAccumulator enrichedPublications; - private final LongAccumulator errorsGeneric; private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; @@ -52,7 +51,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf( LongAccumulator parsedPublications, LongAccumulator enrichedPublications, - LongAccumulator errorsGeneric, LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, LongAccumulator errorsInvalidType, @@ -63,7 +61,6 @@ public class PublicationToOaf implements Serializable { String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; - this.errorsGeneric = errorsGeneric; this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; @@ -77,7 +74,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf() { this.parsedPublications = null; this.enrichedPublications = null; - this.errorsGeneric = null; this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; @@ -88,19 +84,8 @@ public class PublicationToOaf implements Serializable { this.dateOfCollection = null; } - private static final Map> datasources = new HashMap>() { - - { - put( - ModelConstants.ORCID, - new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + ModelConstants.ORCID)); - - } - }; - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname private static final Map> externalIds = new HashMap>() { - { put("ark".toLowerCase(), new Pair<>("ark", "ark")); put("arxiv".toLowerCase(), new Pair<>("arXiv", "arXiv")); @@ -208,10 +193,8 @@ public class PublicationToOaf implements Serializable { .setTitle( titles .stream() - .map(t -> { - return mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null); - }) - .filter(s -> s != null) + .map(t -> mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -226,7 +209,7 @@ public class PublicationToOaf implements Serializable { publication.setId(sourceId); // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + settingRelevantDate(rootElement, publication, "issued", true); // Adding collectedfrom publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); @@ -243,7 +226,7 @@ public class PublicationToOaf implements Serializable { Map publicationType = typologiesMapping.get(type); if ((publicationType == null || publicationType.isEmpty()) && errorsInvalidType != null) { errorsInvalidType.add(1); - logger.error("publication_type_not_found: " + type); + logger.error("publication_type_not_found: {}", type); return null; } @@ -307,7 +290,7 @@ public class PublicationToOaf implements Serializable { // Adding authors final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { + if (authors != null && !authors.isEmpty()) { if (authors.stream().filter(a -> { return ((Objects.nonNull(a.getName()) && a.getName().equals(DEACTIVATED_NAME)) || (Objects.nonNull(a.getSurname()) && a.getSurname().equals(DEACTIVATED_SURNAME))); @@ -322,8 +305,7 @@ public class PublicationToOaf implements Serializable { } else { if (authors == null) { Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String json = gson.toJson(rootElement); - throw new RuntimeException("not_valid_authors: " + json); + throw new RuntimeException("not_valid_authors: " + gson.toJson(rootElement)); } else { if (errorsNotFoundAuthors != null) { errorsNotFoundAuthors.add(1); @@ -434,7 +416,6 @@ public class PublicationToOaf implements Serializable { private void settingRelevantDate(final JsonObject rootElement, final Publication publication, - final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) { @@ -450,10 +431,8 @@ public class PublicationToOaf implements Serializable { Arrays .asList(pubDate) .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .filter(s -> s != null) + .map(r -> mapStructuredProperty(r, q, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); } } @@ -498,7 +477,7 @@ public class PublicationToOaf implements Serializable { final String type = getStringValue(rootElement, "type"); if (!typologiesMapping.containsKey(type)) { - logger.error("unknowntype_" + type); + logger.error("unknowntype_{}", type); if (errorsInvalidType != null) { errorsInvalidType.add(1); } @@ -550,7 +529,7 @@ public class PublicationToOaf implements Serializable { } private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { + if (value == null || StringUtils.isBlank(value)) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index fff753ff36..e69b496b73 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -1,26 +1,14 @@ package eu.dnetlib.doiboost.orcidnodoi.similarity; -import java.io.IOException; import java.text.Normalizer; import java.util.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaroWinklerSimilarity; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.ximpleware.NavException; -import com.ximpleware.ParseException; -import com.ximpleware.XPathEvalException; -import com.ximpleware.XPathParseException; - -import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.dhp.schema.orcid.Contributor; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for searching from a list of publication contributors a @@ -29,18 +17,16 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; * the match is found (if exist) author informations are used to enrich the * matched contribuotr inside contributors list */ - public class AuthorMatcher { - private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); - public static final Double threshold = 0.8; + public static final Double THRESHOLD = 0.8; - public static void match(AuthorData author, List contributors) - throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + private AuthorMatcher() { + } + public static void match(AuthorData author, List contributors) { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); - Contributor contributor = null; contributors .stream() .filter(c -> !StringUtils.isBlank(c.getCreditName())) @@ -62,8 +48,8 @@ public class AuthorMatcher { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -73,14 +59,14 @@ public class AuthorMatcher { } else if (matchCounters.get(0) > 1) { Optional optCon = contributors .stream() - .filter(c -> c.isSimpleMatch()) + .filter(Contributor::isSimpleMatch) .filter(c -> !StringUtils.isBlank(c.getCreditName())) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -92,7 +78,7 @@ public class AuthorMatcher { } public static boolean simpleMatchOnOtherNames(String name, List otherNames) { - if (otherNames == null || (otherNames != null && otherNames.isEmpty())) { + if (otherNames == null || otherNames.isEmpty()) { return false; } return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0; @@ -132,8 +118,7 @@ public class AuthorMatcher { } public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { - Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - return score; + return similarityJaroWinkler(nameA, surnameA, nameB, surnameB); } private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { @@ -179,7 +164,7 @@ public class AuthorMatcher { public static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { contributors .stream() - .filter(c -> c.isBestMatch()) + .filter(Contributor::isBestMatch) .forEach(c -> { c.setName(author.getName()); c.setSurname(author.getSurname()); @@ -206,9 +191,4 @@ public class AuthorMatcher { } } - private static String toJson(WorkDetail work) { - GsonBuilder builder = new GsonBuilder(); - Gson gson = builder.create(); - return gson.toJson(work); - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index 29791bbbd9..b4c12eed39 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -5,9 +5,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; @@ -20,21 +17,10 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for parsing xml data with vtd parser */ - public class XMLRecordParserNoDoi { - private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; private static final String NS_COMMON = "common"; - private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; - private static final String NS_PERSON = "person"; - private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; - private static final String NS_DETAILS = "personal-details"; - private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; - private static final String NS_OTHER = "other-name"; - private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; - private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; private static final String NS_WORK = "work"; @@ -42,6 +28,9 @@ public class XMLRecordParserNoDoi { private static final String NS_ERROR = "error"; + private XMLRecordParserNoDoi() { + } + public static WorkDetail VTDParseWorkData(byte[] bytes) throws VtdException, ParseException, XPathParseException, NavException, XPathEvalException { @@ -100,16 +89,16 @@ public class XMLRecordParserNoDoi { workData.setUrls(urls); } - workData.setPublicationDates(getPublicationDates(vg, vn, ap)); - workData.setExtIds(getExternalIds(vg, vn, ap)); - workData.setContributors(getContributors(vg, vn, ap)); + workData.setPublicationDates(getPublicationDates(vn, ap)); + workData.setExtIds(getExternalIds(vn, ap)); + workData.setContributors(getContributors(vn, ap)); return workData; } - private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getPublicationDates(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List publicationDates = new ArrayList(); + List publicationDates = new ArrayList<>(); int yearIndex = 0; ap.selectXPath("//common:publication-date/common:year"); while (ap.evalXPath() != -1) { @@ -142,9 +131,9 @@ public class XMLRecordParserNoDoi { return publicationDates; } - private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getExternalIds(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List extIds = new ArrayList(); + List extIds = new ArrayList<>(); int typeIndex = 0; ap.selectXPath("//common:external-id/common:external-id-type"); while (ap.evalXPath() != -1) { @@ -177,12 +166,12 @@ public class XMLRecordParserNoDoi { if (typeIndex == valueIndex) { return extIds; } - return new ArrayList(); + return new ArrayList<>(); } - private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getContributors(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List contributors = new ArrayList(); + List contributors = new ArrayList<>(); ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 2b241ed5f8..82577e0d8b 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.orcid; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.*; @@ -57,7 +58,7 @@ public class OrcidClientTest { // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - public void downloadTest() throws Exception { + void downloadTest() throws Exception { final String orcid = "0000-0001-7291-3210"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml"); @@ -159,18 +160,18 @@ public class OrcidClientTest { } @Test - private void testReadBase64CompressedRecord() throws Exception { + void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertTrue(recordFromSeqFile.equals(downloadedRecord)); + assertEquals(recordFromSeqFile, downloadedRecord); } @Test @Disabled - public void lambdaFileReaderTest() throws Exception { + void lambdaFileReaderTest() throws Exception { String last_update = "2021-01-12 00:00:06.685137"; TarArchiveInputStream input = new TarArchiveInputStream( new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); @@ -187,7 +188,7 @@ public class OrcidClientTest { while ((line = br.readLine()) != null) { String[] values = line.split(","); List recordInfo = Arrays.asList(values); - assertTrue(recordInfo.size() == 4); + assertEquals(4, recordInfo.size()); String orcid = recordInfo.get(0); String modifiedDate = recordInfo.get(3); rowNum++; @@ -264,7 +265,7 @@ public class OrcidClientTest { } @Test - public void downloadWorkTest() throws Exception { + void downloadWorkTest() throws Exception { String orcid = "0000-0003-0015-1952"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml"); @@ -274,7 +275,7 @@ public class OrcidClientTest { } @Test - public void downloadRecordTest() throws Exception { + void downloadRecordTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml"); @@ -284,7 +285,7 @@ public class OrcidClientTest { } @Test - public void downloadWorksTest() throws Exception { + void downloadWorksTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS); String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml"); @@ -294,7 +295,7 @@ public class OrcidClientTest { } @Test - public void downloadSingleWorkTest() throws Exception { + void downloadSingleWorkTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml"); @@ -304,7 +305,7 @@ public class OrcidClientTest { } @Test - public void cleanAuthorListTest() throws Exception { + void cleanAuthorListTest() throws Exception { AuthorData a1 = new AuthorData(); a1.setOid("1"); a1.setName("n1"); @@ -333,7 +334,7 @@ public class OrcidClientTest { @Test @Ignore - public void testUpdatedRecord() throws Exception { + void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); @@ -342,7 +343,7 @@ public class OrcidClientTest { @Test @Ignore - private void testUpdatedWork() throws Exception { + void testUpdatedWork() throws Exception { final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA=="; final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 235db52d48..78760fa962 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -24,10 +24,7 @@ import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class XMLRecordParserTest { - private static final String NS_WORK = "work"; - private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; - private static final String NS_COMMON = "common"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static Path testPath; @@ -38,12 +35,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidAuthorDataXMLParser() throws Exception { + void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getName()); @@ -54,12 +49,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidXMLErrorRecordParser() throws Exception { + void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getErrorCode()); @@ -67,14 +60,12 @@ public class XMLRecordParserTest { } @Test - public void testOrcidWorkDataXMLParser() throws Exception { + void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); - XMLRecordParser p = new XMLRecordParser(); - WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes()); assertNotNull(workData); assertNotNull(workData.getOid()); @@ -83,7 +74,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidOtherNamesXMLParser() throws Exception { + void testOrcidOtherNamesXMLParser() throws Exception { String xml = IOUtils .toString( @@ -91,30 +82,13 @@ public class XMLRecordParserTest { AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getOtherNames()); - assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus")); + assertEquals("Andrew C. Porteus", authorData.getOtherNames().get(0)); String jsonData = JsonWriter.create(authorData); assertNotNull(jsonData); } -// @Test -// private void testWorkIdLastModifiedDateXMLParser() throws Exception { -// String xml = IOUtils -// .toString( -// this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); -// Map workIdLastModifiedDate = XMLRecordParser.retrieveWorkIdLastModifiedDate(xml.getBytes()); -// workIdLastModifiedDate.forEach((k, v) -> { -// try { -// OrcidClientTest -// .logToFile( -// k + " " + v + " isModified after " + SparkDownloadOrcidWorks.lastUpdateValue + ": " -// + SparkDownloadOrcidWorks.isModified("0000-0001-5004-5918", v)); -// } catch (IOException e) { -// } -// }); -// } - @Test - public void testAuthorSummaryXMLParser() throws Exception { + void testAuthorSummaryXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); @@ -124,7 +98,7 @@ public class XMLRecordParserTest { } @Test - public void testWorkDataXMLParser() throws Exception { + void testWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 01e26dcb4d..1dbb37fca9 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -11,36 +11,37 @@ import org.slf4j.LoggerFactory; import com.google.gson.JsonElement; import com.google.gson.JsonParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import jdk.nashorn.internal.ir.annotations.Ignore; -public class PublicationToOafTest { +class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test @Ignore - private void convertOafPublicationTest() throws Exception { + void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( PublicationToOafTest.class.getResourceAsStream("publication.json")); JsonElement j = new JsonParser().parse(jsonPublication); - logger.info("json publication loaded: " + j.toString()); + logger.info("json publication loaded: {}", j.toString()); PublicationToOaf publicationToOaf = new PublicationToOaf(); Publication oafPublication = (Publication) publicationToOaf .generatePublicationActionsFromDump(j.getAsJsonObject()); assertNotNull(oafPublication.getId()); assertNotNull(oafPublication.getOriginalId()); - assertEquals(oafPublication.getOriginalId().get(0), "60153327"); - logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals("60153327", oafPublication.getOriginalId().get(0)); + logger.info("oafPublication.getId(): {}", oafPublication.getId()); assertEquals( - oafPublication.getTitle().get(0).getValue(), - "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp", + oafPublication.getTitle().get(0).getValue()); assertNotNull(oafPublication.getLastupdatetimestamp()); assertNotNull(oafPublication.getDateofcollection()); assertNotNull(oafPublication.getDateoftransformation()); - assertTrue(oafPublication.getAuthor().size() == 7); + assertEquals(7, oafPublication.getAuthor().size()); oafPublication.getAuthor().forEach(a -> { assertNotNull(a.getFullname()); assertNotNull(a.getRank()); @@ -64,15 +65,15 @@ public class PublicationToOafTest { if (oafPublication.getExternalReference() != null) { oafPublication.getExternalReference().forEach(e -> { assertNotNull(e.getRefidentifier()); - assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + assertEquals(ModelConstants.DNET_PID_TYPES, e.getQualifier().getSchemeid()); }); } assertNotNull(oafPublication.getInstance()); oafPublication.getInstance().forEach(i -> { assertNotNull(i.getInstancetype().getClassid()); - logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): {}", i.getInstancetype().getClassid()); assertNotNull(i.getInstancetype().getClassname()); - logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): {}", i.getInstancetype().getClassname()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 54c2d6217a..99ec656d58 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -1,8 +1,7 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.util.*; @@ -25,7 +24,7 @@ import eu.dnetlib.dhp.schema.orcid.Contributor; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; -public class OrcidNoDoiTest { +class OrcidNoDoiTest { private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); @@ -34,7 +33,7 @@ public class OrcidNoDoiTest { static String orcidIdA = "0000-0003-2760-1191"; @Test - public void readPublicationFieldsTest() + void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils @@ -44,10 +43,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -87,7 +82,7 @@ public class OrcidNoDoiTest { } @Test - public void authorDoubleMatchTest() throws Exception { + void authorDoubleMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -98,71 +93,49 @@ public class OrcidNoDoiTest { .toString( OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Abdel-Dayem K")); + assertEquals("Abdel-Dayem K", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 6); + assertEquals(6, workData.getContributors().size()); } @Test - public void readContributorsTest() + void readContributorsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils .toString( OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml")); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName())); - assertTrue(workData.getContributors().get(0).getSequence().equals("seq0")); - assertTrue(workData.getContributors().get(0).getRole().equals("role0")); - assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1")); + assertEquals("seq0", workData.getContributors().get(0).getSequence()); + assertEquals("role0", workData.getContributors().get(0).getRole()); + assertEquals("creditname1", workData.getContributors().get(1).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence())); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole())); - assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2")); - assertTrue(workData.getContributors().get(2).getSequence().equals("seq2")); + assertEquals("creditname2", workData.getContributors().get(2).getCreditName()); + assertEquals("seq2", workData.getContributors().get(2).getSequence()); assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole())); - assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3")); + assertEquals("creditname3", workData.getContributors().get(3).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence())); - assertTrue(workData.getContributors().get(3).getRole().equals("role3")); + assertEquals("role3", workData.getContributors().get(3).getRole()); assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName())); - assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); - assertTrue(workData.getContributors().get(4).getRole().equals("role4")); + assertEquals("seq4", workData.getContributors().get(4).getSequence()); + assertEquals("role4", workData.getContributors().get(4).getRole()); } @Test - public void authorSimpleMatchTest() throws Exception { + void authorSimpleMatchTest() throws Exception { String orcidWork = "activity_work_0000-0002-5982-8983.xml"; AuthorData author = new AuthorData(); author.setName("Parkhouse"); @@ -175,10 +148,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -188,20 +157,21 @@ public class OrcidNoDoiTest { assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Parkhouse, H.")); + assertEquals("Parkhouse, H.", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 2); + assertEquals(2, workData.getContributors().size()); Contributor c = workData.getContributors().get(0); - assertTrue(c.getOid().equals("0000-0002-5982-8983")); - assertTrue(c.getName().equals("Parkhouse")); - assertTrue(c.getSurname().equals("H.")); - assertTrue(c.getCreditName().equals("Parkhouse, H.")); + + assertEquals("0000-0002-5982-8983", c.getOid()); + assertEquals("Parkhouse", c.getName()); + assertEquals("H.", c.getSurname()); + assertEquals("Parkhouse, H.", c.getCreditName()); } @Test - public void match() { + void match() { AuthorData author = new AuthorData(); author.setName("Joe"); @@ -210,7 +180,6 @@ public class OrcidNoDoiTest { Contributor contributor = new Contributor(); contributor.setCreditName("Joe Dodge"); List contributors = Arrays.asList(contributor); - AuthorMatcher am = new AuthorMatcher(); int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); contributors @@ -225,12 +194,13 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters.get(0) == 1); + assertEquals(1, matchCounters.get(0)); AuthorMatcher.updateAuthorsSimpleMatch(contributors, author); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("Joe Dodge", contributors.get(0).getCreditName()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); AuthorData authorX = new AuthorData(); authorX.setName(nameA); @@ -259,7 +229,7 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters2.get(0) == 2); + assertEquals(2, matchCounters2.get(0)); assertTrue(contributorList.get(0).isSimpleMatch()); assertTrue(contributorList.get(1).isSimpleMatch()); @@ -271,7 +241,7 @@ public class OrcidNoDoiTest { c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= AuthorMatcher.threshold) + .filter(c -> c.getScore() >= AuthorMatcher.THRESHOLD) .max(Comparator.comparing(c -> c.getScore())); assertTrue(optCon.isPresent()); @@ -281,15 +251,16 @@ public class OrcidNoDoiTest { assertTrue(contributorList.get(0).isBestMatch()); assertTrue(!contributorList.get(1).isBestMatch()); AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX); - assertTrue(contributorList.get(0).getName().equals(nameA)); - assertTrue(contributorList.get(0).getSurname().equals(surnameA)); - assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); - assertTrue(contributorList.get(0).getOid().equals(orcidIdA)); + + assertEquals(nameA, contributorList.get(0).getName()); + assertEquals(surnameA, contributorList.get(0).getSurname()); + assertEquals("Abdel-Dayem Khai", contributorList.get(0).getCreditName()); + assertEquals(orcidIdA, contributorList.get(0).getOid()); assertTrue(StringUtils.isBlank(contributorList.get(1).getOid())); } @Test - public void authorBestMatchTest() throws Exception { + void authorBestMatchTest() throws Exception { String name = "Khairy"; String surname = "Abdel Dayem"; String orcidWork = "activity_work_0000-0003-2760-1191.xml"; @@ -304,10 +275,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -315,16 +282,17 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); List c = workData.getContributors(); - assertTrue(c.get(0).getName().equals(name)); - assertTrue(c.get(0).getSurname().equals(surname)); - assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); - assertTrue(c.get(0).getOid().equals(orcidIdA)); + + assertEquals(name, c.get(0).getName()); + assertEquals(surname, c.get(0).getSurname()); + assertEquals("Khair Abde Daye", c.get(0).getCreditName()); + assertEquals(orcidIdA, c.get(0).getOid()); } @Test - public void otherNamesMatchTest() + void otherNamesMatchTest() throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException { AuthorData author = new AuthorData(); @@ -341,8 +309,9 @@ public class OrcidNoDoiTest { contributor.setCreditName("XY"); List contributors = Arrays.asList(contributor); AuthorMatcher.match(author, contributors); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 692605b035..0b4a80b2de 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -14,12 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class PropagationConstant { + + private PropagationConstant() { + } + public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index 0f45d3beb7..8e76b57789 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -5,16 +5,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { - private static final Log log = LogFactory.getLog(Community.class); - private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 844fe29621..5d92f5ab6e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -2,15 +2,9 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; @@ -22,8 +16,6 @@ import eu.dnetlib.dhp.bulktag.criteria.Selection; /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { - private static final Log log = LogFactory.getLog(CommunityConfiguration.class); - private Map communities; // map subject -> communityid @@ -136,7 +128,7 @@ public class CommunityConfiguration implements Serializable { else return null; }) - .filter(st -> (st != null)) + .filter(Objects::nonNull) .collect(Collectors.toList()); } @@ -161,7 +153,7 @@ public class CommunityConfiguration implements Serializable { private List getContextIds(List> list) { if (list != null) { - return list.stream().map(p -> p.getFst()).collect(Collectors.toList()); + return list.stream().map(Pair::getFst).collect(Collectors.toList()); } return Lists.newArrayList(); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 749ed292f9..822d350783 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -13,6 +13,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,11 +32,16 @@ public class CommunityConfigurationFactory { private static final VerbResolver resolver = VerbResolverFactory.newInstance(); - public static CommunityConfiguration newInstance(final String xml) throws DocumentException { + private CommunityConfigurationFactory() { + } + + public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException { log.debug(String.format("parsing community configuration from:\n%s", xml)); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Map communities = Maps.newHashMap(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index e0856ae8f6..9002e718f5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -13,9 +13,6 @@ public class Constraint implements Serializable { private String value; private Selection selection; - public Constraint() { - } - public String getVerb() { return verb; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index b56dfaaa31..0f6fab238e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -19,11 +19,8 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); - // private ConstraintEncapsulator ce; - private List constraint; - public Constraints() { - } + private List constraint; public List getConstraint() { return constraint; @@ -44,13 +41,8 @@ public class Constraints implements Serializable { try { st.setSelection(resolver); - } catch (NoSuchMethodException e) { - log.error(e.getMessage()); - } catch (IllegalAccessException e) { - log.error(e.getMessage()); - } catch (InvocationTargetException e) { - log.error(e.getMessage()); - } catch (InstantiationException e) { + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException + | InstantiationException e) { log.error(e.getMessage()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index a9427b5941..cb198dc438 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -19,10 +19,6 @@ public class Provider implements Serializable { private SelectionConstraints selectionConstraints; - public SelectionConstraints getSelCriteria() { - return selectionConstraints; - } - public SelectionConstraints getSelectionConstraints() { return selectionConstraints; } @@ -31,10 +27,6 @@ public class Provider implements Serializable { this.selectionConstraints = selectionConstraints; } - public void setSelCriteria(SelectionConstraints selCriteria) { - this.selectionConstraints = selCriteria; - } - public String getOpenaireId() { return openaireId; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 993a7ef775..89cf5beffe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.util.List; import org.dom4j.DocumentException; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -63,7 +64,7 @@ public class QueryInformationSystem { + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); final List res = isLookUp.quickSearchProfile(XQUERY); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c9ff269631..c8b1bc8fe2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -43,7 +43,6 @@ public class ResultTagger implements Serializable { param.put(key, jsonContext.read(params.get(key))); } catch (com.jayway.jsonpath.PathNotFoundException e) { param.put(key, new ArrayList<>()); - // throw e; } } return param; @@ -52,9 +51,6 @@ public class ResultTagger implements Serializable { public R enrichContextCriteria( final R result, final CommunityConfiguration conf, final Map criteria) { - // } - // public Result enrichContextCriteria(final Result result, final CommunityConfiguration - // conf, final Map criteria) { final Map> param = getParamMap(result, criteria); // Verify if the entity is deletedbyinference. In case verify if to clean the context list @@ -74,7 +70,7 @@ public class ResultTagger implements Serializable { result .getSubject() .stream() - .map(subject -> subject.getValue()) + .map(StructuredProperty::getValue) .filter(StringUtils::isNotBlank) .map(String::toLowerCase) .map(String::trim) @@ -90,15 +86,11 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { - if (Objects.nonNull(i.getCollectedfrom())) { - if (Objects.nonNull(i.getCollectedfrom().getKey())) { - tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); - } + if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { + tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); } - if (Objects.nonNull(i.getHostedby())) { - if (Objects.nonNull(i.getHostedby().getKey())) { - tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); - } + if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { + tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); } } @@ -149,52 +141,46 @@ public class ResultTagger implements Serializable { return result; } - result - .getContext() - .stream() - .map( - c -> { - if (communities.contains(c.getId())) { - Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); - List dataInfoList; - if (opt_dataInfoList.isPresent()) - dataInfoList = opt_dataInfoList.get(); - else { - dataInfoList = new ArrayList<>(); - c.setDataInfo(dataInfoList); - } - if (subjects.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_SUBJECT, - CLASS_NAME_BULKTAG_SUBJECT, - TAGGING_TRUST)); - if (datasources.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_DATASOURCE, - CLASS_NAME_BULKTAG_DATASOURCE, - TAGGING_TRUST)); - if (czenodo.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_CZENODO, - CLASS_NAME_BULKTAG_ZENODO, - TAGGING_TRUST)); - } - return c; - }) - .collect(Collectors.toList()); + result.getContext().forEach(c -> { + if (communities.contains(c.getId())) { + Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); + List dataInfoList; + if (opt_dataInfoList.isPresent()) + dataInfoList = opt_dataInfoList.get(); + else { + dataInfoList = new ArrayList<>(); + c.setDataInfo(dataInfoList); + } + if (subjects.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_SUBJECT, + CLASS_NAME_BULKTAG_SUBJECT, + TAGGING_TRUST)); + if (datasources.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_DATASOURCE, + CLASS_NAME_BULKTAG_DATASOURCE, + TAGGING_TRUST)); + if (czenodo.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_CZENODO, + CLASS_NAME_BULKTAG_ZENODO, + TAGGING_TRUST)); + } + }); communities .removeAll( - result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet())); + result.getContext().stream().map(Context::getId).collect(Collectors.toSet())); if (communities.isEmpty()) return result; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 71ff61d1b3..c7dcce8126 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -15,9 +15,6 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; public class SelectionConstraints implements Serializable { private List criteria; - public SelectionConstraints() { - } - public List getCriteria() { return criteria; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index 80d98bb1a6..8274e26c9a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.community; public class TaggingConstants { + private TaggingConstants() { + } + public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; public static final String CLASS_ID_SUBJECT = "community:subject"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index bc6b75fba2..54c2dc9aa8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -31,7 +31,6 @@ public class ZenodoCommunity implements Serializable { } private void setSelCriteria(String json) { - // Type collectionType = new TypeToken>(){}.getType(); selCriteria = new Gson().fromJson(json, SelectionConstraints.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java index ec9fb716d4..9129e6e541 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.bulktag.criteria; -public interface Selection { +import java.io.Serializable; + +public interface Selection extends Serializable { boolean apply(String value); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 54176efb67..459ac1ba9c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -7,16 +7,16 @@ import java.util.Map; import java.util.stream.Collectors; import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; import io.github.classgraph.ClassInfoList; import io.github.classgraph.ScanResult; public class VerbResolver implements Serializable { - private Map> map = null; // = new HashMap<>(); - private final ClassGraph classgraph = new ClassGraph(); + + private Map> map = null; public VerbResolver() { + final ClassGraph classgraph = new ClassGraph(); try (ScanResult scanResult = // Assign scanResult in try-with-resources classgraph // Create a new ClassGraph instance .verbose() // If you want to enable logging to stderr @@ -41,8 +41,6 @@ public class VerbResolver implements Serializable { .get(0) .getValue(), value -> (Class) value.loadClass())); - } catch (Exception e) { - e.printStackTrace(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java index 0bb801999a..446ad5fbc5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.criteria; public class VerbResolverFactory { + private VerbResolverFactory() { + } + public static VerbResolver newInstance() { return new VerbResolver(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 04a659a1ca..ddc7f93f71 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -6,11 +6,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,11 +17,11 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; /** * For the association of the country to the datasource The association is computed only for datasource of specific type @@ -77,16 +76,16 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = " d.id = '" + whitelist.get(0) + "'"; - for (int i = 1; i < whitelist.size(); i++) { - whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; - } - String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + final String whitelisted = whitelist + .stream() + .map(id -> " d.id = '" + id + "'") + .collect(Collectors.joining(" OR ")); - for (int i = 1; i < allowedtypes.size(); i++) { - allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; - } + final String allowed = allowedtypes + .stream() + .map(type -> " d.datasourcetype.classid = '" + type + "'") + .collect(Collectors.joining(" OR ")); Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8d0d6c48b9..77f7288f67 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -85,13 +85,12 @@ public class PrepareResultCountrySet { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); - // log.info("number of results: {}", result.count()); + createCfHbforResult(spark); Dataset datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class); datasource_country.createOrReplaceTempView("datasource_country"); - // log.info("datasource_country number : {}", datasource_country.count()); spark .sql(RESULT_COUNTRYSET_QUERY) @@ -102,7 +101,7 @@ public class PrepareResultCountrySet { ArrayList countryList = a.getCountrySet(); Set countryCodes = countryList .stream() - .map(country -> country.getClassid()) + .map(CountrySbs::getClassid) .collect(Collectors.toSet()); b .getCountrySet() @@ -119,10 +118,6 @@ public class PrepareResultCountrySet { }) .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .option("compression", "gzip") -// .mode(SaveMode.Append) -// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 97e0a33e16..4aa48583f4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.*; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +19,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -28,8 +29,6 @@ public class SparkCountryPropagationJob { private static final Logger log = LoggerFactory.getLogger(SparkCountryPropagationJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -90,7 +89,6 @@ public class SparkCountryPropagationJob { boolean saveGraph) { if (saveGraph) { - // updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath); log.info("Reading Graph table from: {}", sourcePath); Dataset res = readPath(spark, sourcePath, resultClazz); @@ -122,7 +120,7 @@ public class SparkCountryPropagationJob { private static List merge(List c1, List c2) { HashSet countries = c1 .stream() - .map(c -> c.getClassid()) + .map(Qualifier::getClassid) .collect(Collectors.toCollection(HashSet::new)); return c2 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index b3ef3a1121..95b870292d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -8,9 +8,7 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,7 +16,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index 2cea32e58f..c60012a748 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -87,7 +87,7 @@ public class PrepareResultOrcidAssociationStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 436a53cbe0..40faef7f33 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -18,7 +18,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -141,34 +140,31 @@ public class SparkOrcidToResultFromSemRelJob { author_surname = author.getSurname(); } if (StringUtils.isNotEmpty(author_surname)) { + // have the same surname. Check the name if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - author_name = author.getName(); + .equalsIgnoreCase(author_surname.trim()) && StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author_name.trim())) { + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 27ff727fdf..ac61e26f94 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -2,30 +2,25 @@ package eu.dnetlib.dhp.projecttoresult; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.PropagationConstant.getConstraintList; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareProjectResultsAssociation { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjectResultsAssociation.class); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index c57abb451d..1ec521af18 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -5,28 +5,27 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; public class SparkResultToProjectThroughSemRelJob { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(SparkResultToProjectThroughSemRelJob.class); public static void main(String[] args) throws Exception { @@ -95,26 +94,21 @@ public class SparkResultToProjectThroughSemRelJob { private static FlatMapFunction, Relation> mapRelationRn() { return value -> { - List new_relations = new ArrayList<>(); - ResultProjectSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - if (already_linked.isPresent()) { - already_linked - .get() - .getProjectSet() - .stream() - .forEach( - (p -> { - potential_update.getProjectSet().remove(p); - })); - } - String resId = potential_update.getResultId(); - potential_update + List newRelations = new ArrayList<>(); + ResultProjectSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + alreadyLinked + .ifPresent( + resultProjectSet -> resultProjectSet + .getProjectSet() + .forEach( + (p -> potentialUpdate.getProjectSet().remove(p)))); + String resId = potentialUpdate.getResultId(); + potentialUpdate .getProjectSet() - .stream() .forEach( projectId -> { - new_relations + newRelations .add( getRelation( resId, @@ -125,7 +119,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - new_relations + newRelations .add( getRelation( projectId, @@ -137,7 +131,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index a5f84cd2fb..1a008797da 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -10,11 +10,12 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -109,10 +110,6 @@ public class PrepareResultCommunitySet { }) .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .json(outputPath); } private static MapFunction mapResultCommunityFn( @@ -131,7 +128,7 @@ public class PrepareResultCommunitySet { communitySet.addAll(organizationMap.get(oId)); } } - if (communitySet.size() > 0) { + if (!communitySet.isEmpty()) { ResultCommunityList rcl = new ResultCommunityList(); rcl.setResultId(rId); ArrayList communityList = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 7201a30f64..cb80a90ca9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -4,7 +4,10 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +20,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToCommunityFromOrganizationJob { @@ -59,6 +61,7 @@ public class SparkResultToCommunityFromOrganizationJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -106,9 +109,12 @@ public class SparkResultToCommunityFromOrganizationJob { List contextList = ret .getContext() .stream() - .map(con -> con.getId()) + .map(Context::getId) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R res = (R) ret.getClass().newInstance(); + res.setId(ret.getId()); List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 09340369d3..0ddb19a1ac 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -11,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +90,7 @@ public class PrepareResultCommunitySetStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4cb241ef21..3690351fb8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -62,6 +62,7 @@ public class SparkResultToCommunityThroughSemRelJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); runWithSparkHiveSession( @@ -105,15 +106,15 @@ public class SparkResultToCommunityThroughSemRelJob { R ret = value._1(); Optional rcl = Optional.ofNullable(value._2()); if (rcl.isPresent()) { - Set context_set = new HashSet<>(); - ret.getContext().stream().forEach(c -> context_set.add(c.getId())); + Set contexts = new HashSet<>(); + ret.getContext().forEach(c -> contexts.add(c.getId())); List contextList = rcl .get() .getCommunityList() .stream() .map( c -> { - if (!context_set.contains(c)) { + if (!contexts.contains(c)) { Context newContext = new Context(); newContext.setId(c); newContext @@ -130,7 +131,10 @@ public class SparkResultToCommunityThroughSemRelJob { }) .filter(Objects::nonNull) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R r = (R) ret.getClass().newInstance(); + r.setId(ret.getId()); r.setContext(contextList); ret.mergeFrom(r); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 3cf36e5728..0ef5ca1810 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; @@ -91,13 +92,11 @@ public class PrepareResultInstRepoAssociation { private static void prepareDatasourceOrganization( SparkSession spark, String datasourceOrganizationPath, List blacklist) { - String blacklisted = ""; - if (blacklist.size() > 0) { - blacklisted = " AND id != '" + blacklist.get(0) + "'"; - for (int i = 1; i < blacklist.size(); i++) { - blacklisted += " AND id != '" + blacklist.get(i) + "'"; - } - } + + final String blacklisted = blacklist + .stream() + .map(s -> " AND id != '" + s + "'") + .collect(Collectors.joining()); String query = "SELECT source datasourceId, target organizationId " + "FROM ( SELECT id " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 01d7b85e4c..63824f1a81 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -4,23 +4,24 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToOrganizationFromIstRepoJob { @@ -84,7 +85,6 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - // removeOutputDir(spark, outputPath); if (saveGraph) { execPropagation( spark, @@ -105,9 +105,9 @@ public class SparkResultToOrganizationFromIstRepoJob { String outputPath, Class clazz) { - Dataset ds_org = readPath(spark, datasourceorganization, DatasourceOrganization.class); + Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, ds_org); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); @@ -125,26 +125,20 @@ public class SparkResultToOrganizationFromIstRepoJob { private static FlatMapFunction, Relation> createRelationFn() { return value -> { - List new_relations = new ArrayList<>(); - ResultOrganizationSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - List organization_list = potential_update.getOrganizationSet(); - if (already_linked.isPresent()) { - already_linked - .get() - .getOrganizationSet() - .stream() - .forEach( - rId -> { - organization_list.remove(rId); - }); - } - String resultId = potential_update.getResultId(); - organization_list - .stream() + List newRelations = new ArrayList<>(); + ResultOrganizationSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getOrganizationSet(); + alreadyLinked + .ifPresent( + resOrg -> resOrg + .getOrganizationSet() + .forEach(organizations::remove)); + String resultId = potentialUpdate.getResultId(); + organizations .forEach( orgId -> { - new_relations + newRelations .add( getRelation( orgId, @@ -155,7 +149,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - new_relations + newRelations .add( getRelation( resultId, @@ -167,7 +161,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } @@ -175,13 +169,13 @@ public class SparkResultToOrganizationFromIstRepoJob { SparkSession spark, String inputPath, Class resultClazz, - Dataset ds_org) { + Dataset dsOrg) { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); createCfHbforResult(spark); - ds_org.createOrReplaceTempView("rels"); + dsOrg.createOrReplaceTempView("rels"); return spark .sql(RESULT_ORGANIZATIONSET_QUERY) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 72e0a63fa2..07299f01a2 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -90,7 +90,7 @@ public class BulkTagJobTest { } @Test - public void noUpdatesTest() throws Exception { + void noUpdatesTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -128,7 +128,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectNoPreviousContextTest() throws Exception { + void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getPath(); @@ -224,7 +224,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") @@ -306,7 +306,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByDatasourceTest() throws Exception { + void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getPath(); @@ -378,7 +378,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByZenodoCommunityTest() throws Exception { + void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") @@ -500,7 +500,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceTest() throws Exception { + void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getPath(); @@ -628,7 +628,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { + void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { SparkBulkTagJob .main( @@ -724,7 +724,7 @@ public class BulkTagJobTest { } @Test - public void bulktagDatasourcewithConstraintsTest() throws Exception { + void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() .getResource( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index ca737b79f7..861546adb8 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.google.gson.Gson; @@ -20,12 +21,12 @@ import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ -public class CommunityConfigurationFactoryTest { +class CommunityConfigurationFactoryTest { private final VerbResolver resolver = new VerbResolver(); @Test - public void parseTest() throws DocumentException, IOException { + void parseTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() @@ -39,7 +40,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void applyVerb() + void applyVerb() throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, InstantiationException { Constraint sc = new Constraint(); @@ -52,7 +53,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void loadSelCriteriaTest() throws DocumentException, IOException { + void loadSelCriteriaTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 88ad43b6be..963ee55291 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; @@ -67,7 +67,7 @@ public class CountryPropagationJobTest { } @Test - public void testCountryPropagationSoftware() throws Exception { + void testCountryPropagationSoftware() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") .getPath(); @@ -105,7 +105,7 @@ public class CountryPropagationJobTest { Dataset countryExploded = verificationDs .flatMap( (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); + .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -119,10 +119,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassid = verificationDs .flatMap((FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -180,10 +179,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -241,10 +239,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 238375197f..85db7ecf93 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -65,7 +65,7 @@ public class OrcidPropagationJobTest { } @Test - public void noUpdateTest() throws Exception { + void noUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate") .getPath(); @@ -111,7 +111,7 @@ public class OrcidPropagationJobTest { } @Test - public void oneUpdateTest() throws Exception { + void oneUpdateTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { @@ -178,7 +178,7 @@ public class OrcidPropagationJobTest { } @Test - public void twoUpdatesTest() throws Exception { + void twoUpdatesTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index abed028e10..2fe1bc574c 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -69,7 +69,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() .getResource( @@ -106,7 +106,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateTenTest() throws Exception { + void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") @@ -178,7 +178,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index d739516fc1..4dd8b976c2 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityFromOrganizationJob() throws Exception { + void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") .getPath(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 7709e00a85..0d5b12c808 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityThroughSemRelJob() throws Exception { + void testSparkResultToCommunityThroughSemRelJob() throws Exception { SparkResultToCommunityThroughSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index cfcccc5f0d..fdcb10fb95 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -67,7 +67,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -110,7 +110,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void UpdateNoMixTest() throws Exception { + void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -176,7 +176,7 @@ public class ResultToOrganizationJobTest { } @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") .getPath(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index f06059dd58..95aa749b24 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; -public class CleaningRuleMap extends HashMap> implements Serializable { +public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { /** * Creates the mapping for the Oaf types subject to cleaning diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 9ba153ba5e..5502fd391e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -16,7 +16,7 @@ public class OafCleaner implements Serializable { try { navigate(oaf, mapping); } catch (IllegalAccessException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } return oaf; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index 00ddcb5a86..0d2df11fe6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -1,22 +1,21 @@ package eu.dnetlib.dhp.oa.graph.dump; -import java.io.*; +import java.io.IOException; +import java.io.Serializable; import java.util.Optional; -import org.apache.commons.compress.archivers.ar.ArArchiveEntry; -import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.MakeTarArchive; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class MakeTar implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d118accbae..dc740e8112 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -8,6 +8,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -29,7 +30,7 @@ public class QueryInformationSystem { ""; public CommunityMap getCommunityMap() - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { return getMap(isLookUp.quickSearchProfile(XQUERY)); } @@ -42,12 +43,14 @@ public class QueryInformationSystem { this.isLookUp = isLookUpService; } - private CommunityMap getMap(List communityMap) throws DocumentException { + private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { final CommunityMap map = new CommunityMap(); for (String xml : communityMap) { final Document doc; - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index d30b3122c0..500fe5986e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -93,7 +93,7 @@ public class ResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -109,20 +109,20 @@ public class ResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -132,7 +132,8 @@ public class ResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -219,11 +220,12 @@ public class ResultMapper implements Serializable { if (oInst.isPresent()) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((GraphResult) out) - .setInstance(oInst.get().stream().map(i -> getGraphInstance(i)).collect(Collectors.toList())); + .setInstance( + oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); } else { ((CommunityResult) out) .setInstance( - oInst.get().stream().map(i -> getCommunityInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(ResultMapper::getCommunityInstance).collect(Collectors.toList())); } } @@ -422,7 +424,7 @@ public class ResultMapper implements Serializable { Optional .ofNullable(i.getInstancetype()) .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + Optional.ofNullable(i.getUrl()).ifPresent(instance::setUrl); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6ac626518c..f86f6918f9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -31,25 +32,23 @@ public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); private final QueryInformationSystem queryInformationSystem; - private final Configuration conf; private final BufferedWriter writer; public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { - conf = new Configuration(); + final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath); + fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } public static void main(String[] args) throws Exception { @@ -74,10 +73,9 @@ public class SaveCommunityMap implements Serializable { final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); scm.saveCommunityMap(); - } - private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException { + private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException, SAXException { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap())); writer.close(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index fd8262544c..ba26b708a7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -17,9 +17,9 @@ import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class SendToZenodoHDFS implements Serializable { - private final static String NEW = "new"; // to be used for a brand new deposition in zenodo - private final static String VERSION = "version"; // to be used to upload a new version of a published deposition - private final static String UPDATE = "update"; // to upload content to an open deposition not published + private static final String NEW = "new"; // to be used for a brand new deposition in zenodo + private static final String VERSION = "version"; // to be used to upload a new version of a published deposition + private static final String UPDATE = "update"; // to upload content to an open deposition not published private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); @@ -85,7 +85,6 @@ public class SendToZenodoHDFS implements Serializable { Path p = fileStatus.getPath(); String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { - // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); String name = p_string.substring(p_string.lastIndexOf("/") + 1); log.info("Sending information for community: " + name); if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { @@ -102,9 +101,9 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } - if (publish) + if (publish) { zenodoApiClient.publish(); - + } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index 984e8b128c..8e75e9d921 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -25,6 +25,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private Utils() { + } + public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } @@ -57,7 +60,7 @@ public class Utils { public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath)))); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); try { String line; while ((line = br.readLine()) != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 55f075e954..a9c0770a80 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -57,27 +57,22 @@ public class CommunitySplit implements Serializable { Dataset community_products = result .filter((FilterFunction) r -> containsCommunity(r, c)); - try { - community_products.first(); - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); - } catch (Exception e) { - - } - + community_products.first(); + community_products + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + c); } private static boolean containsCommunity(CommunityResult r, String c) { if (Optional.ofNullable(r.getContext()).isPresent()) { - return r + return !r .getContext() .stream() .filter(con -> con.getCode().equals(c)) .collect(Collectors.toList()) - .size() > 0; + .isEmpty(); } return false; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java index 63970d14bc..7ab8a75409 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java @@ -2,9 +2,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import java.io.Serializable; -import java.util.*; - -import javax.swing.text.html.Option; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; @@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 2d43888b46..0f2b4c2ed1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -22,6 +22,7 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -127,11 +129,11 @@ public class SparkPrepareResultProject implements Serializable { op.getCode().getValue(), Optional .ofNullable(op.getAcronym()) - .map(a -> a.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getTitle()) - .map(v -> v.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getFundingtree()) @@ -140,7 +142,7 @@ public class SparkPrepareResultProject implements Serializable { .stream() .map(ft -> getFunder(ft.getValue())) .collect(Collectors.toList()); - if (tmp.size() > 0) { + if (!tmp.isEmpty()) { return tmp.get(0); } else { return null; @@ -161,30 +163,23 @@ public class SparkPrepareResultProject implements Serializable { } private static Funder getFunder(String fundingtree) { - // ["nsf_________::NSFNSFNational Science - // FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision - // of Computing and Communication FoundationsDivision of Computing and Communication - // Foundationsnsf_________::NSF::CISE/OADDirectorate for - // Computer & Information Science & EngineeringDirectorate for Computer & - // Information Science & - // Engineeringnsf:fundingStream"] - Funder f = new Funder(); + final Funder f = new Funder(); final Document doc; try { - doc = new SAXReader().read(new StringReader(fundingtree)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(fundingtree)); f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); for (Object o : doc.selectNodes("//funding_level_0")) { List node = ((Node) o).selectNodes("./name"); f.setFundingStream(((Node) node.get(0)).getText()); - } return f; - } catch (DocumentException e) { - e.printStackTrace(); + } catch (DocumentException | SAXException e) { + throw new IllegalArgumentException(e); } - return f; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 2b80b1d861..39ae320536 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -60,7 +60,7 @@ public class SparkUpdateProjectInfo implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz); + extend(spark, inputPath, outputPath, preparedInfoPath); }); } @@ -77,9 +77,7 @@ public class SparkUpdateProjectInfo implements Serializable { "left") .map((MapFunction, CommunityResult>) value -> { CommunityResult r = value._1(); - Optional.ofNullable(value._2()).ifPresent(rp -> { - r.setProjects(rp.getProjectsList()); - }); + Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; }, Encoders.bean(CommunityResult.class)) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java index eb546624e5..57708a78d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java @@ -23,5 +23,4 @@ public class Constants implements Serializable { public static final String CONTEXT_NS_PREFIX = "context_____"; public static final String UNKNOWN = "UNKNOWN"; - // public static final String FUNDER_DS = "entityregistry::projects"; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index ccb84c7138..120de9327b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; -import org.apache.commons.crypto.utils.IoUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,15 +16,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -33,8 +33,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; public class CreateContextEntities implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; + private final transient Configuration conf; + private final transient BufferedWriter writer; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -88,7 +88,7 @@ public class CreateContextEntities implements Serializable { } public void execute(final Function producer, String isLookUpUrl) - throws Exception { + throws ISLookUpException { QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); @@ -101,10 +101,9 @@ public class CreateContextEntities implements Serializable { protected void writeEntity(final R r) { try { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - // log.info("writing context : {}", new Gson().toJson(r)); writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 102406315b..10f2014d00 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -7,6 +7,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; import java.util.function.Function; @@ -31,20 +32,21 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; * and the project is not created because of a low coverage in the profiles of openaire ids related to projects */ public class CreateContextRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; - private final QueryInformationSystem queryInformationSystem; + private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); + private final transient Configuration conf; + private final transient BufferedWriter writer; + private final transient QueryInformationSystem queryInformationSystem; private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CreateContextRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + Objects + .requireNonNull( + CreateContextRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -70,10 +72,6 @@ public class CreateContextRelation implements Serializable { cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); log.info("Creating relations for projects... "); -// cce -// .execute( -// Process::getRelation, CONTEX_RELATION_PROJECT, -// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); cce.close(); @@ -107,7 +105,7 @@ public class CreateContextRelation implements Serializable { public void execute(final Function> producer, String category, String prefix) { - final Consumer consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c)); + final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); queryInformationSystem.getContextRelation(consumer, category, prefix); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 31d105b66f..6b9f132778 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -6,8 +6,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -21,8 +19,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.*; * context entity and datasource/projects related to the context. */ public class Process implements Serializable { - private static final Logger log = LoggerFactory.getLogger(Process.class); + @SuppressWarnings("unchecked") public static R getEntity(ContextInfo ci) { try { ResearchInitiative ri; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index c33a693a54..0ed5de67cc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -11,6 +11,7 @@ import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -84,8 +85,9 @@ public class QueryInformationSystem { final Document doc; try { - - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); cinfo.setId(root.attributeValue("id")); @@ -102,7 +104,7 @@ public class QueryInformationSystem { } consumer.accept(cinfo); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { e.printStackTrace(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 868fa89fe1..4365e861fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -61,7 +61,7 @@ public class SparkOrganizationRelation implements Serializable { log.info("organization map : {}", new Gson().toJson(organizationMap)); final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {} ", communityMapPath); + log.info("communityMapPath: {}", communityMapPath); SparkConf conf = new SparkConf(); @@ -117,15 +117,12 @@ public class SparkOrganizationRelation implements Serializable { } })); - // if (relList.size() > 0) { spark .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - // } - } @NotNull diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 00f604b14b..d8a1b5c212 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -14,16 +16,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.zenodo.Community; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 1a28a21f43..2d2b04b5cc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -18,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -62,6 +61,7 @@ public class SparkResultLinkedToProject implements Serializable { final String relationPath = parser.get("relationPath"); log.info("relationPath: {}", relationPath); + @SuppressWarnings("unchecked") Class inputClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -95,9 +95,9 @@ public class SparkResultLinkedToProject implements Serializable { ._2() .getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction, R>) (k, it) -> { - return it.next()._2(); - }, Encoders.bean(inputClazz)) + .mapGroups( + (MapGroupsFunction, R>) (k, it) -> it.next()._2(), + Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java index 59bdb3914a..f87c0eb7a1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java @@ -74,24 +74,4 @@ public class DatasourceCompatibilityComparator implements Comparator return lClass.compareTo(rClass); } - /* - * CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY - * ['openaire-cris_1.1']) THEN 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT - * COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - * 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - * 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - * 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - * 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - * 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - * 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) - * THEN 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END - */ } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java index 602213e584..ef419a042e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java @@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; import java.util.stream.Collectors; -import javax.xml.crypto.Data; - import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -16,7 +14,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -55,9 +51,11 @@ public class MergeGraphTableSparkJob { String jsonConfiguration = IOUtils .toString( - CleanGraphSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json")); + Objects + .requireNonNull( + MergeGraphTableSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -133,7 +131,7 @@ public class MergeGraphTableSparkJob { HashSet collectedFromNames = Optional .ofNullable(o.getCollectedfrom()) .map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet()); + .orElse(new HashSet<>()); return !collectedFromNames.contains("Datacite"); }) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 03c3eeb3ce..9aa4e4c313 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -32,8 +32,6 @@ import org.dom4j.Document; import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -88,8 +86,6 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Map nsContext = new HashMap<>(); - private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class); - static { nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); @@ -117,9 +113,6 @@ public abstract class AbstractMdRecordToOafMapper { } public List processMdRecord(final String xml) { - - // log.info("Processing record: " + xml); - try { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); @@ -134,7 +127,7 @@ public abstract class AbstractMdRecordToOafMapper { doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); if (collectedFrom == null) { - return null; + return Lists.newArrayList(); } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) @@ -142,7 +135,7 @@ public abstract class AbstractMdRecordToOafMapper { : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); if (hostedBy == null) { - return null; + return Lists.newArrayList(); } final DataInfo info = prepareDataInfo(doc, invisible); @@ -161,21 +154,17 @@ public abstract class AbstractMdRecordToOafMapper { protected String getResultType(final Document doc, final List instances) { final String type = doc.valueOf("//dr:CobjCategory/@type"); - if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + if (StringUtils.isBlank(type) && vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) .findFirst() - .map(s -> UNKNOWN.equalsIgnoreCase(s) ? "0000" : s) + .filter(s -> !UNKNOWN.equalsIgnoreCase(s)) .orElse("0000"); // Unknown return Optional .ofNullable(vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(q -> q.getClassid()) + .map(Qualifier::getClassid) .orElse("0000"); - /* - * .orElseThrow( () -> new IllegalArgumentException( String.format("'%s' not mapped in %s", instanceType, - * DNET_RESULT_TYPOLOGIES))); - */ } return type; @@ -185,7 +174,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; } @@ -498,7 +487,6 @@ public abstract class AbstractMdRecordToOafMapper { accessRight.setSchemename(qualifier.getSchemename()); // TODO set the OAStatus - // accessRight.setOaStatus(...); return accessRight; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index bbfb7429f3..9027b49d75 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -127,7 +127,7 @@ public class GenerateEntitiesApplication { .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) .map(k -> convertToListOaf(k._1(), k._2(), shouldHashId, vocs)) .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .flatMap(List::iterator)); } switch (mode) { @@ -135,7 +135,7 @@ public class GenerateEntitiesApplication { save( inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - .reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2)) + .reduceByKey(OafMapperUtils::merge) .map(Tuple2::_2), targetPath); break; @@ -191,7 +191,7 @@ public class GenerateEntitiesApplication { case "otherresearchproduct": return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + throw new IllegalArgumentException("type not managed: " + type.toLowerCase()); } } @@ -199,9 +199,9 @@ public class GenerateEntitiesApplication { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (final Exception e) { - log.error("Error parsing object of class: " + clazz); + log.error("Error parsing object of class: {}", clazz); log.error(s); - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index d5c310c1b7..ee1b6a5da2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -40,9 +40,11 @@ public class MergeClaimsApplication { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + Objects + .requireNonNull( + MergeClaimsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")))); parser.parseArgument(args); Boolean isSparkSessionManaged = Optional diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe2..b9033702dc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -9,10 +9,7 @@ import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -72,8 +69,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateDbEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))); + Objects + .requireNonNull( + MigrateDbEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")))); parser.parseArgument(args); @@ -87,7 +86,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("postgresPassword: xxx"); final String dbSchema = parser.get("dbschema"); - log.info("dbSchema {}: " + dbSchema); + log.info("dbSchema {}: ", dbSchema); final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); @@ -659,18 +658,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); - // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); - return Arrays.asList(r1); } catch (final Exception e) { throw new RuntimeException(e); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java index 1d4eca2c26..4110bd8067 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -82,11 +83,11 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication public static void processPaths(final SparkSession spark, final String outputPath, final Set paths, - final String type) throws Exception { + final String type) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - log.info("Found " + paths.size() + " not empty mdstores"); + log.info("Found {} not empty mdstores", paths.size()); paths.forEach(log::info); final String[] validPaths = paths @@ -98,7 +99,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication spark .read() .parquet(validPaths) - .map((MapFunction) r -> enrichRecord(r), Encoders.STRING()) + .map((MapFunction) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING()) .toJavaRDD() .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml))) // .coalesce(1) @@ -120,7 +121,9 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication final String tranDate = dateFormat.format(new Date((Long) r.getAs("dateOfTransformation"))); try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']"); head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id")); head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate); @@ -135,8 +138,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication private static Set mdstorePaths(final String mdstoreManagerUrl, final String format, final String layout, - final String interpretation) - throws Exception { + final String interpretation) throws IOException { final String url = mdstoreManagerUrl + "/mdstores/"; final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java index 3f6afbeac7..6dbab96cb2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java @@ -51,10 +51,10 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio public void execute(final String format, final String layout, final String interpretation) { final Map colls = mdstoreClient.validCollections(format, layout, interpretation); - log.info("Found " + colls.size() + " mdstores"); + log.info("Found {} mdstores", colls.size()); for (final Entry entry : colls.entrySet()) { - log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")"); + log.info("Processing mdstore {} (collection: {})", entry.getKey(), entry.getValue()); final String currentColl = entry.getValue(); for (final String xml : mdstoreClient.listRecords(currentColl)) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index d753cddeb4..2b49a9dc18 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -19,7 +19,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -56,8 +55,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); author.setPid(new ArrayList<>()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 7925a78269..02aab4f162 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -13,7 +13,6 @@ import org.dom4j.Node; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -88,11 +87,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); if (type.toLowerCase().startsWith(ORCID)) { - final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { res.add(structuredProperty(id, MAG_PID_TYPE, info)); @@ -388,7 +387,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final Set res = new HashSet(); + final Set res = new HashSet<>(); res .addAll( prepareListStructPropsWithValidQualifier( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index 5523863ff0..edfd652994 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -4,12 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index a0ce4f5a69..5d32fe926d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -12,6 +12,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Oaf; @@ -34,7 +35,7 @@ public class AbstractMigrationApplication implements Closeable { this.writer = null; } - public AbstractMigrationApplication(final String hdfsPath) throws Exception { + public AbstractMigrationApplication(final String hdfsPath) throws IOException { log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath)); @@ -46,15 +47,14 @@ public class AbstractMigrationApplication implements Closeable { SequenceFile.Writer.valueClass(Text.class)); } - private Configuration getConf() throws IOException { - final Configuration conf = new Configuration(); + private Configuration getConf() { + return new Configuration(); /* * conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl", * org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", * org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser); * System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf); */ - return conf; } protected void emit(final String s, final String type) { @@ -62,16 +62,16 @@ public class AbstractMigrationApplication implements Closeable { key.set(counter.getAndIncrement() + ":" + type); value.set(s); writer.append(key, value); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalStateException(e); } } protected void emitOaf(final Oaf oaf) { try { emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase()); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (JsonProcessingException e) { + throw new IllegalStateException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index 32f6e7abce..afaac04ea2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -68,7 +68,7 @@ public class GraphHiveImporterJobTest { } @Test - public void testImportGraphAsHiveDB() throws Exception { + void testImportGraphAsHiveDB() throws Exception { GraphHiveImporterJob .main( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index b196d1948a..edcd72ab43 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -50,7 +50,7 @@ public class GraphCleaningFunctionsTest { } @Test - public void testCleaning() throws Exception { + void testCleaning() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -166,10 +166,6 @@ public class GraphCleaningFunctionsTest { // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); - - /* - * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); - */ } private Stream getAuthorPidTypes(Result pub) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 803ae0416a..697ec705f0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -10,10 +10,10 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; @Disabled -public class GenerateJsonSchema { +class GenerateJsonSchema { @Test - public void generateSchema() { + void generateSchema() { SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) .with(Option.SCHEMA_VERSION_INDICATOR) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java index 51e4e10338..41b906e58a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java @@ -22,7 +22,7 @@ public class MakeTarTest { } @Test - public void testTar() throws IOException { + void testTar() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); fs diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index d5a9ba8ddb..03eed7a450 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.dump; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.HashMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; @@ -15,7 +16,6 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -37,8 +37,6 @@ public class PrepareResultProjectJobTest { private static final Logger log = LoggerFactory .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class); - private static final HashMap map = new HashMap<>(); - @BeforeAll public static void beforeAll() throws IOException { workingDir = Files @@ -69,7 +67,7 @@ public class PrepareResultProjectJobTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match") @@ -90,12 +88,12 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertEquals(0, verificationDataset.count()); + assertEquals(0, verificationDataset.count()); } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one") @@ -116,12 +114,11 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 1); + assertEquals(1, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); verificationDataset.createOrReplaceTempView("table"); @@ -131,14 +128,14 @@ public class PrepareResultProjectJobTest { "from table " + "lateral view explode (projectsList) pl as projList"); - Assertions.assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); verificationDataset.show(false); } @Test - public void testMatch() throws Exception { + void testMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match") @@ -159,16 +156,14 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 2); + assertEquals(2, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); @@ -177,62 +172,54 @@ public class PrepareResultProjectJobTest { + "lateral view explode(projectsList) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(3, resultExplodedProvenance.count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals(3, resultExplodedProvenance.count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index c6666342a0..98902b618e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -14,12 +14,13 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -66,7 +67,7 @@ public class QueryInformationSystemTest { private Map map; @BeforeEach - public void setUp() throws ISLookUpException, DocumentException { + public void setUp() throws ISLookUpException, DocumentException, SAXException { lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(isLookUpService); @@ -74,13 +75,13 @@ public class QueryInformationSystemTest { } @Test - public void testSize() throws ISLookUpException { + void testSize() throws ISLookUpException { Assertions.assertEquals(23, map.size()); } @Test - public void testContent() { + void testContent() { Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation")); Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 42ad5634a7..2ea4bc91ac 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee08..a164593ec3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -68,7 +68,7 @@ public class UpdateProjectInfoTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 05dc423cbe..8d06758a81 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -32,7 +32,7 @@ public class ZenodoUploadTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { CommunityMap communityMap = new CommunityMap(); communityMap.put("ni", "Neuroinformatics"); communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); @@ -86,7 +86,7 @@ public class ZenodoUploadTest { } @Test - public void testNewVersion() throws IOException, MissingConceptDoiException { + void testNewVersion() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -137,7 +137,7 @@ public class ZenodoUploadTest { } @Test - public void readCommunityMap() throws IOException { + void readCommunityMap() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); System.out .println( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 3ecbd18943..f881e6b308 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -86,7 +86,7 @@ public class CreateEntityTest { } @Test - public void test1() throws ISLookUpException, IOException { + void test1() throws ISLookUpException, IOException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); @@ -144,7 +144,7 @@ public class CreateEntityTest { @Test @Disabled - public void test2() throws IOException, ISLookUpException { + void test2() throws IOException, ISLookUpException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); Path hdfsWritePath = new Path(workingDir + "/prova"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index b556fa2d66..69e550d45b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -16,7 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.utils.DHPUtils; -public class CreateRelationTest { +class CreateRelationTest { List communityContext = Arrays .asList( @@ -473,7 +473,7 @@ public class CreateRelationTest { } @Test - public void test1() { + void test1() { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 3d42f124e3..e43383ef4c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -60,7 +60,7 @@ public class ExtractRelationFromEntityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java index 75d5a26735..eb5919fd5d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java @@ -4,13 +4,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder; -public class FunderParsingTest { +class FunderParsingTest { @Test - public void testFunderTwoLevels() throws DocumentException { + void testFunderTwoLevels() throws DocumentException { String funding_Stream = "nsf_________::NSFNSFNational Science " + @@ -37,7 +38,7 @@ public class FunderParsingTest { } @Test - public void testFunderThreeeLevels() throws DocumentException { + void testFunderThreeeLevels() throws DocumentException, SAXException { String funding_stream = "ec__________::EC" + "EC" + "European Commission" + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index d769aa138a..0499597046 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -17,7 +17,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -513,7 +513,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeEntity() throws ISLookUpException { + void testSizeEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -523,7 +523,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeRelation() throws ISLookUpException { + void testSizeRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -534,7 +534,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentRelation() throws ISLookUpException { + void testContentRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -572,7 +572,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentEntity() throws ISLookUpException { + void testContentEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java index ea2dc73caf..50a9f26b47 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java @@ -69,7 +69,7 @@ public class RelationFromOrganizationTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 6c5ebbab36..d1e3b3acc5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -69,7 +69,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") @@ -102,7 +102,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 71bf5d9421..8ac0c552f4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -65,7 +65,7 @@ public class SplitPerFunderTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java index 0089811cf8..2d28ee3054 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java @@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Datasource; -public class MergeGraphTableSparkJobTest { +class MergeGraphTableSparkJobTest { private ObjectMapper mapper; @@ -25,7 +25,7 @@ public class MergeGraphTableSparkJobTest { } @Test - public void testMergeDatasources() throws IOException { + void testMergeDatasources() throws IOException { assertEquals( "openaire-cris_1.1", MergeGraphTableSparkJob diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index e0d202209f..1e974dd692 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -24,7 +24,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class GenerateEntitiesApplicationTest { +class GenerateEntitiesApplicationTest { @Mock private ISLookUpService isLookUpService; @@ -44,7 +44,7 @@ public class GenerateEntitiesApplicationTest { } @Test - public void testMergeResult() throws IOException { + void testMergeResult() throws IOException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c431b4dd82..000dbfe25d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -21,7 +22,6 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class MappersTest { +class MappersTest { @Mock private ISLookUpService isLookUpService; @@ -50,7 +50,7 @@ public class MappersTest { @Test void testPublication() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -71,7 +71,7 @@ public class MappersTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertFalse(p.getDataInfo().getInvisible()); - assertTrue(p.getSource().size() == 1); + assertEquals(1, p.getSource().size()); assertTrue(StringUtils.isNotBlank(p.getDateofcollection())); assertTrue(StringUtils.isNotBlank(p.getDateoftransformation())); @@ -88,7 +88,7 @@ public class MappersTest { .getPid() .stream() .findFirst() - .get(); + .orElseThrow(() -> new IllegalStateException("missing author pid")); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -108,7 +108,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -141,17 +140,15 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); - // System.out.println(new ObjectMapper().writeValueAsString(p)); - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testPublication_PubMed() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record_pubmed.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -196,23 +193,22 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(p.getPid().size() > 0); - assertEquals(p.getPid().get(0).getValue(), "PMC1517292"); - assertEquals(p.getPid().get(0).getQualifier().getClassid(), "pmc"); + assertEquals("PMC1517292", p.getPid().get(0).getValue()); + assertEquals("pmc", p.getPid().get(0).getQualifier().getClassid()); assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); assertNotNull(p.getInstance().get(0).getPid()); - assertTrue(p.getInstance().get(0).getPid().size() == 2); + assertEquals(2, p.getInstance().get(0).getPid().size()); - assertTrue(p.getInstance().get(0).getAlternateIdentifier().size() == 1); + assertEquals(1, p.getInstance().get(0).getAlternateIdentifier().size()); assertEquals("doi", p.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("10.3897/oneeco.2.e13718", p.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); @@ -223,7 +219,7 @@ public class MappersTest { @Test void testPublicationInvisible() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); @@ -238,7 +234,7 @@ public class MappersTest { @Test void testDataset() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -281,12 +277,13 @@ public class MappersTest { .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - final StructuredProperty pid = author + final Optional oPid = author .get() .getPid() .stream() - .findFirst() - .get(); + .findFirst(); + assertTrue(oPid.isPresent()); + final StructuredProperty pid = oPid.get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -315,7 +312,6 @@ public class MappersTest { assertTrue(d.getInstance().size() > 0); d .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -345,13 +341,14 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testOdfBielefeld() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_bielefeld.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -389,7 +386,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -399,7 +395,8 @@ public class MappersTest { @Test void testOpentrial() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -444,7 +441,7 @@ public class MappersTest { assertEquals(1, d.getDescription().size()); assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); - assertTrue(d.getAuthor().size() == 1); + assertEquals(1, d.getAuthor().size()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); assertEquals("Kristian K.", d.getAuthor().get(0).getName()); assertEquals("Jensen", d.getAuthor().get(0).getSurname()); @@ -462,7 +459,7 @@ public class MappersTest { assertTrue(d.getContext().isEmpty()); assertNotNull(d.getInstance()); - assertTrue(d.getInstance().size() == 1); + assertEquals(1, d.getInstance().size()); final Instance i = d.getInstance().get(0); @@ -515,7 +512,7 @@ public class MappersTest { @Test void testSoftware() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -532,22 +529,15 @@ public class MappersTest { assertTrue(s.getInstance().size() > 0); } - // @Test - void testDataset_2() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset_2.xml")); - - final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); - - System.out.println("***************"); - System.out.println(new ObjectMapper().writeValueAsString(list)); - System.out.println("***************"); - } - @Test void testClaimDedup() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_dedup.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + assertNotNull(list); + assertFalse(list.isEmpty()); + System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println("***************"); @@ -555,7 +545,7 @@ public class MappersTest { @Test void testNakala() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -583,7 +573,7 @@ public class MappersTest { @Test void testEnermaps() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -608,7 +598,8 @@ public class MappersTest { @Test void testClaimFromCrossref() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -624,7 +615,7 @@ public class MappersTest { @Test void testODFRecord() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); @@ -638,7 +629,7 @@ public class MappersTest { @Test void testTextGrid() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -652,9 +643,9 @@ public class MappersTest { assertEquals(1, p.getAuthor().size()); assertEquals("OPEN", p.getBestaccessright().getClassid()); - assertTrue(p.getPid().size() == 1); + assertEquals(1, p.getPid().size()); assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); - assertTrue(PidType.handle.equals(PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()))); + assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); assertEquals("dataset", p.getResulttype().getClassname()); assertEquals(1, p.getInstance().size()); @@ -672,7 +663,7 @@ public class MappersTest { @Test void testBologna() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -689,7 +680,7 @@ public class MappersTest { @Test void testJairo() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -702,7 +693,7 @@ public class MappersTest { assertNotNull(p.getTitle()); assertFalse(p.getTitle().isEmpty()); - assertTrue(p.getTitle().size() == 1); + assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); final Publication p_cleaned = cleanup(fixVocabularyNames(p)); @@ -713,7 +704,8 @@ public class MappersTest { @Test void testOdfFromHdfs() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -749,7 +741,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("UNKNOWN", i.getAccessright().getClassid()); @@ -768,13 +759,16 @@ public class MappersTest { private List vocs() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); + Objects + .requireNonNull(MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"))); } private List synonyms() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); + Objects + .requireNonNull( + MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"))); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb28..69943435a9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -32,7 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) -public class MigrateDbEntitiesApplicationTest { +class MigrateDbEntitiesApplicationTest { private MigrateDbEntitiesApplication app; @@ -58,7 +58,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasource() throws Exception { + void testProcessDatasource() throws Exception { final List fields = prepareMocks("datasources_resultset_entry.json"); final List list = app.processDatasource(rs); @@ -81,7 +81,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProject() throws Exception { + void testProcessProject() throws Exception { final List fields = prepareMocks("projects_resultset_entry.json"); final List list = app.processProject(rs); @@ -99,7 +99,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessOrganization() throws Exception { + void testProcessOrganization() throws Exception { final List fields = prepareMocks("organizations_resultset_entry.json"); final List list = app.processOrganization(rs); @@ -126,7 +126,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasourceOrganization() throws Exception { + void testProcessDatasourceOrganization() throws Exception { final List fields = prepareMocks("datasourceorganization_resultset_entry.json"); final List list = app.processDatasourceOrganization(rs); @@ -143,7 +143,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProjectOrganization() throws Exception { + void testProcessProjectOrganization() throws Exception { final List fields = prepareMocks("projectorganization_resultset_entry.json"); final List list = app.processProjectOrganization(rs); @@ -162,7 +162,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_context() throws Exception { + void testProcessClaims_context() throws Exception { final List fields = prepareMocks("claimscontext_resultset_entry.json"); final List list = app.processClaims(rs); @@ -177,7 +177,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_rels() throws Exception { + void testProcessClaims_rels() throws Exception { final List fields = prepareMocks("claimsrel_resultset_entry.json"); final List list = app.processClaims(rs); @@ -208,9 +208,6 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(r1.getCollectedfrom().get(0).getKey()); assertValidId(r2.getCollectedfrom().get(0).getKey()); - - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); } private List prepareMocks(final String jsonFile) throws IOException, SQLException { @@ -273,7 +270,7 @@ public class MigrateDbEntitiesApplicationTest { final String[] values = ((List) tf.getValue()) .stream() .filter(Objects::nonNull) - .map(o -> o.toString()) + .map(Object::toString) .toArray(String[]::new); Mockito.when(arr.getArray()).thenReturn(values); @@ -334,6 +331,7 @@ public class MigrateDbEntitiesApplicationTest { return new Float(getValueAs(name, fields).toString()); } + @SuppressWarnings("unchecked") private T getValueAs(final String name, final List fields) { return fields .stream() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java index fb2c90e5c5..3b9616de37 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java @@ -26,8 +26,6 @@ import io.fares.junit.mongodb.MongoForAllExtension; @Disabled public class MigrateMongoMdstoresApplicationTest { - private static final Logger log = LoggerFactory.getLogger(MigrateMongoMdstoresApplicationTest.class); - public static final String COLL_NAME = "9eed8a4d-bb41-47c3-987f-9d06aee0dec0::1453898911558"; @RegisterExtension diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index 3fd3654161..c8158c044e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -75,7 +75,7 @@ public class PatchRelationApplicationTest { } @Test - public void testPatchRelationApplication() throws Exception { + void testPatchRelationApplication() throws Exception { final String graphBasePath = workingDir.toString() + "/graphBasePath"; PatchRelationsApplication.main(new String[] { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java index 110fabf450..ec059ad735 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java @@ -9,7 +9,7 @@ import java.util.List; import org.junit.jupiter.api.Test; -public class ReflectionTest { +class ReflectionTest { private final Cleaner cleaner = new Cleaner(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bdd..3301577eea 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -46,9 +46,11 @@ public class CreateRelatedEntitiesJob_phase1 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase1.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -146,7 +148,11 @@ public class CreateRelatedEntitiesJob_phase1 { Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { - final StructuredProperty title = result.getTitle().stream().findFirst().get(); + final StructuredProperty title = result + .getTitle() + .stream() + .findFirst() + .orElseThrow(() -> new IllegalStateException("missing title in " + entity.getId())); title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH)); re.setTitle(title); } @@ -196,7 +202,7 @@ public class CreateRelatedEntitiesJob_phase1 { List> f = p.getFundingtree(); if (!f.isEmpty()) { - re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); + re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); } break; } @@ -211,7 +217,7 @@ public class CreateRelatedEntitiesJob_phase1 { return Optional .ofNullable(f) .filter(Objects::nonNull) - .map(x -> x.getValue()) + .map(Field::getValue) .orElse(defaultValue); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index c013a2bf69..85fb4a6b2e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -49,9 +49,11 @@ public class CreateRelatedEntitiesJob_phase2 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index b3f7854924..ae899c3d85 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,14 +131,14 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) - .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); JavaRDD pruned = pruneRels( pruneRels( rels, - sourceMaxRelations, relPartitions, (Function) r -> r.getSource()), - targetMaxRelations, relPartitions, (Function) r -> r.getTarget()); + sourceMaxRelations, relPartitions, (Function) Relation::getSource), + targetMaxRelations, relPartitions, (Function) Relation::getTarget); spark .createDataset(pruned.rdd(), Encoders.bean(Relation.class)) .repartition(relPartitions) @@ -170,8 +170,8 @@ public class PrepareRelationsJob { .map( (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), Encoders.kryo(Relation.class)) - .filter((FilterFunction) rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter((FilterFunction) rel -> relationFilter.contains(rel.getRelClass()) == false) + .filter((FilterFunction) rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) Relation::getSource, Encoders.STRING()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java index 28c1111d67..01d161b6b1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.oa.provision; public class ProvisionConstants { + private ProvisionConstants() { + } + public static final String LAYOUT = "index"; public static final String INTERPRETATION = "openaire"; public static final String SEPARATOR = "-"; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 410aff5bad..0033978bf8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -17,7 +17,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.dhp.oa.provision.utils.ZkServers; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class SolrAdminApplication implements Closeable { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446f..b383f67efa 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -40,7 +39,7 @@ public class XmlConverterJob { private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class); - public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; + public static final String SCHEMA_LOCATION = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; public static void main(String[] args) throws Exception { @@ -95,7 +94,7 @@ public class XmlConverterJob { prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, + SCHEMA_LOCATION, otherDsTypeId); final List paths = HdfsSupport @@ -186,7 +185,7 @@ public class XmlConverterJob { accumulators .put( "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + sc.longAccumulator("organizationOrganization_dedup_merges")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index a321bdba9c..e7dbdbd2b2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -3,14 +3,12 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; -import javax.swing.text.html.Option; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.stream.StreamResult; @@ -163,7 +161,7 @@ public class XmlIndexingJob { case HDFS: spark .createDataset( - docs.map(s -> new SerializableSolrInputDocument(s)).rdd(), + docs.map(SerializableSolrInputDocument::new).rdd(), Encoders.kryo(SerializableSolrInputDocument.class)) .write() .mode(SaveMode.Overwrite) @@ -174,14 +172,13 @@ public class XmlIndexingJob { } } - protected static String toIndexRecord(Transformer tr, final String record) { + protected static String toIndexRecord(Transformer tr, final String xmlRecord) { final StreamResult res = new StreamResult(new StringWriter()); try { - tr.transform(new StreamSource(new StringReader(record)), res); + tr.transform(new StreamSource(new StringReader(xmlRecord)), res); return res.getWriter().toString(); - } catch (Throwable e) { - log.error("XPathException on record: \n {}", record, e); - throw new IllegalArgumentException(e); + } catch (TransformerException e) { + throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e); } } @@ -192,8 +189,6 @@ public class XmlIndexingJob { * @param xslt xslt for building the index record transformer * @param fields the list of fields * @return the javax.xml.transform.Transformer - * @throws ISLookUpException could happen - * @throws IOException could happen * @throws TransformerException could happen */ protected static String getLayoutTransformer(String format, String fields, String xslt) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java index 2eb9cf38be..0fb109fbb5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.provision.model; import java.io.Serializable; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index c09ed86e54..d4ee24c141 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -11,6 +11,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class ProvisionModelSupport { + private ProvisionModelSupport() { + } + public static Class[] getModelClasses() { List> modelClasses = Lists.newArrayList(ModelSupport.getOafModelClasses()); modelClasses diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java index e15ceff760..5c78d1826a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java @@ -34,7 +34,6 @@ public class RelatedEntity implements Serializable { private Qualifier datasourcetype; private Qualifier datasourcetypeui; private Qualifier openairecompatibility; - // private String aggregatortype; // organization private String legalname; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java index 7391569ed9..a910504032 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -5,7 +5,6 @@ import java.util.Comparator; import java.util.Optional; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index ac418f2b9b..bcaf406039 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -9,6 +9,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -23,7 +24,7 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; public static ContextMapper fromIS(final String isLookupUrl) - throws DocumentException, ISLookUpException { + throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); StringBuilder sb = new StringBuilder(""); Joiner.on("").appendTo(sb, isLookUp.quickSearchProfile(XQUERY)); @@ -31,10 +32,12 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } - public static ContextMapper fromXml(final String xml) throws DocumentException { + public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); for (Object o : doc.selectNodes("//entry")) { Node node = (Node) o; String id = node.valueOf("./@id"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index d2131ef28f..3750d0173d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -8,16 +8,18 @@ import java.util.Set; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets + public static final Set authorPidTypes = Sets .newHashSet( ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); + private GraphMappingUtils() { + } + public static String removePrefix(final String s) { if (s.contains("|")) return substringAfter(s, "|"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java index 29a51cb29d..8c7c61361e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java @@ -25,11 +25,9 @@ public class ISLookupClient { * * @param format the Metadata format name * @return the string representation of the list of fields to be indexed - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ - public String getLayoutSource(final String format) - throws ISLookUpDocumentNotFoundException, ISLookUpException { + public String getLayoutSource(final String format) throws ISLookUpException { return doLookup( String .format( @@ -41,7 +39,6 @@ public class ISLookupClient { * Method retrieves from the information system the openaireLayoutToRecordStylesheet * * @return the string representation of the XSLT contained in the transformation rule profile - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ public String getLayoutTransformer() throws ISLookUpException { @@ -78,9 +75,9 @@ public class ISLookupClient { } private String doLookup(String xquery) throws ISLookUpException { - log.info(String.format("running xquery: %s", xquery)); + log.info("running xquery: {}", xquery); final String res = getIsLookup().getResourceProfileByQuery(xquery); - log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ...")); + log.info("got response (100 chars): {} ...", StringUtils.left(res, 100)); return res; } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java deleted file mode 100644 index 9dbac1936c..0000000000 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.oa.provision.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class LicenseComparator implements Comparator { - - @Override - public int compare(Qualifier left, Qualifier right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - String lClass = left.getClassid(); - String rClass = right.getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals("OPEN SOURCE")) - return -1; - if (rClass.equals("OPEN SOURCE")) - return 1; - - if (lClass.equals("OPEN")) - return -1; - if (rClass.equals("OPEN")) - return 1; - - if (lClass.equals("6MONTHS")) - return -1; - if (rClass.equals("6MONTHS")) - return 1; - - if (lClass.equals("12MONTHS")) - return -1; - if (rClass.equals("12MONTHS")) - return 1; - - if (lClass.equals("EMBARGO")) - return -1; - if (rClass.equals("EMBARGO")) - return 1; - - if (lClass.equals("RESTRICTED")) - return -1; - if (rClass.equals("RESTRICTED")) - return 1; - - if (lClass.equals("CLOSED")) - return -1; - if (rClass.equals("CLOSED")) - return 1; - - if (lClass.equals("UNKNOWN")) - return -1; - if (rClass.equals("UNKNOWN")) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } -} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index f16ee260fe..36028be9e1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -3,10 +3,10 @@ package eu.dnetlib.dhp.oa.provision.utils; import java.io.StringReader; import java.io.StringWriter; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Objects; import javax.xml.stream.*; import javax.xml.stream.events.Namespace; @@ -57,13 +57,13 @@ public class StreamingInputDocumentFactory { private static final int MAX_FIELD_LENGTH = 25000; private final ThreadLocal inputFactory = ThreadLocal - .withInitial(() -> XMLInputFactory.newInstance()); + .withInitial(XMLInputFactory::newInstance); private final ThreadLocal outputFactory = ThreadLocal - .withInitial(() -> XMLOutputFactory.newInstance()); + .withInitial(XMLOutputFactory::newInstance); private final ThreadLocal eventFactory = ThreadLocal - .withInitial(() -> XMLEventFactory.newInstance()); + .withInitial(XMLEventFactory::newInstance); private final String version; @@ -126,6 +126,8 @@ public class StreamingInputDocumentFactory { return indexDocument; } catch (XMLStreamException e) { throw new IllegalStateException(e); + } finally { + inputFactory.remove(); } } @@ -143,9 +145,9 @@ public class StreamingInputDocumentFactory { /** * Parse the targetFields block and add fields to the solr document. * - * @param indexDocument - * @param parser - * @throws XMLStreamException + * @param indexDocument the document being populated + * @param parser the XML parser + * @throws XMLStreamException when the parser cannot parse the XML */ protected void parseTargetFields( final SolrInputDocument indexDocument, final XMLEventReader parser) @@ -165,9 +167,10 @@ public class StreamingInputDocumentFactory { final XMLEvent text = parser.nextEvent(); String data = getText(text); - - addField(indexDocument, fieldName, data); - hasFields = true; + if (Objects.nonNull(data)) { + addField(indexDocument, fieldName, data); + hasFields = true; + } } } @@ -192,36 +195,43 @@ public class StreamingInputDocumentFactory { final List nsList, final String dnetResult) throws XMLStreamException { + final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); + final XMLEventFactory xmlEventFactory = this.eventFactory.get(); + try { - for (Namespace ns : nsList) { - eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI()); - } - - StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator()); - - // new root record - writer.add(newRecord); - - // copy the rest as it is - while (parser.hasNext()) { - final XMLEvent resultEvent = parser.nextEvent(); - - // TODO: replace with depth tracking instead of close tag tracking. - if (resultEvent.isEndElement() - && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { - writer.add(eventFactory.get().createEndElement("", null, RESULT)); - break; + for (Namespace ns : nsList) { + xmlEventFactory.createNamespace(ns.getPrefix(), ns.getNamespaceURI()); } - writer.add(resultEvent); + StartElement newRecord = xmlEventFactory.createStartElement("", null, RESULT, null, nsList.iterator()); + + // new root record + writer.add(newRecord); + + // copy the rest as it is + while (parser.hasNext()) { + final XMLEvent resultEvent = parser.nextEvent(); + + // TODO: replace with depth tracking instead of close tag tracking. + if (resultEvent.isEndElement() + && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { + writer.add(xmlEventFactory.createEndElement("", null, RESULT)); + break; + } + + writer.add(resultEvent); + } + writer.close(); + indexDocument.addField(INDEX_RESULT, results.toString()); + } finally { + outputFactory.remove(); + eventFactory.remove(); } - writer.close(); - indexDocument.addField(INDEX_RESULT, results.toString()); } /** - * Helper used to add a field to a solr doc. It avoids to add empy fields + * Helper used to add a field to a solr doc, avoids adding empty fields * * @param indexDocument * @param field @@ -231,7 +241,6 @@ public class StreamingInputDocumentFactory { final SolrInputDocument indexDocument, final String field, final String value) { String cleaned = value.trim(); if (!cleaned.isEmpty()) { - // log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); indexDocument.addField(field.toLowerCase(), cleaned); } } @@ -243,9 +252,9 @@ public class StreamingInputDocumentFactory { * @return the */ protected final String getText(final XMLEvent text) { - if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + - // text.asEndElement().getName().getLocalPart()); + if (text.isEndElement()) { return ""; + } final String data = text.asCharacters().getData(); if (data != null && data.length() > MAX_FIELD_LENGTH) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 173ba326a3..7487f09560 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -50,7 +50,7 @@ public class TemplateFactory { public String getChild(final String name, final String id, final List metadata) { return getTemplate(resources.getChild()) .add("name", name) - .add("hasId", !(id == null)) + .add("hasId", id != null) .add("id", id != null ? escapeXml(removePrefix(id)) : "") .add("metadata", metadata) .render(); @@ -103,7 +103,7 @@ public class TemplateFactory { (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) - .map(w -> getWebResource(w)) + .map(this::getWebResource) .collect(Collectors.toList())) .render(); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 2c82402900..631335376f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -25,8 +25,8 @@ import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; +import org.xml.sax.SAXException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Lists; @@ -46,6 +46,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { + public static final String DISALLOW_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl"; private final Map accumulators; private final Set specialDatasourceTypes; @@ -56,8 +57,6 @@ public class XmlRecordFactory implements Serializable { private boolean indent = false; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, @@ -86,7 +85,6 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); - // final OafEntity entity = toOafEntity(je.getEntity()); OafEntity entity = je.getEntity(); TemplateFactory templateFactory = new TemplateFactory(); try { @@ -95,8 +93,7 @@ public class XmlRecordFactory implements Serializable { final List metadata = metadata(type, entity, contexts); // rels has to be processed before the contexts because they enrich the contextMap with - // the - // funding info. + // the funding info. final List links = je.getLinks(); final List relations = links .stream() @@ -122,34 +119,11 @@ public class XmlRecordFactory implements Serializable { } } - private static OafEntity parseOaf(final String json, final String type) { - try { - switch (EntityType.valueOf(type)) { - case publication: - return OBJECT_MAPPER.readValue(json, Publication.class); - case dataset: - return OBJECT_MAPPER.readValue(json, Dataset.class); - case otherresearchproduct: - return OBJECT_MAPPER.readValue(json, OtherResearchProduct.class); - case software: - return OBJECT_MAPPER.readValue(json, Software.class); - case datasource: - return OBJECT_MAPPER.readValue(json, Datasource.class); - case organization: - return OBJECT_MAPPER.readValue(json, Organization.class); - case project: - return OBJECT_MAPPER.readValue(json, Project.class); - default: - throw new IllegalArgumentException("invalid type: " + type); - } - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - private String printXML(String xml, boolean indent) { try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document doc = reader.read(new StringReader(xml)); OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); @@ -157,7 +131,7 @@ public class XmlRecordFactory implements Serializable { XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); - } catch (IOException | DocumentException e) { + } catch (IOException | DocumentException | SAXException e) { throw new IllegalArgumentException("Unable to indent XML. Invalid record:\n" + xml, e); } } @@ -203,7 +177,7 @@ public class XmlRecordFactory implements Serializable { final Result r = (Result) entity; if (r.getContext() != null) { - contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); + contexts.addAll(r.getContext().stream().map(Context::getId).collect(Collectors.toList())); /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ if (contexts.contains("dh-ch::subcommunity::2")) { contexts.add("clarin"); @@ -260,14 +234,14 @@ public class XmlRecordFactory implements Serializable { .collect( Collectors .groupingBy( - p -> p.getValue(), + StructuredProperty::getValue, Collectors .mapping( p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) .values() .stream() - .map(op -> op.get()) + .map(Optional::get) .forEach( sp -> { String pidType = getAuthorPidType(sp.getQualifier().getClassid()); @@ -938,7 +912,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .filter(Objects::nonNull) - .map(ft -> ft.getValue()) + .map(Field::getValue) .collect(Collectors.toList())); } @@ -1069,7 +1043,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .peek(ft -> fillContextMap(ft, contexts)) - .map(ft -> getRelFundingTree(ft)) + .map(XmlRecordFactory::getRelFundingTree) .collect(Collectors.toList())); } break; @@ -1112,7 +1086,7 @@ public class XmlRecordFactory implements Serializable { final List links = je.getLinks(); List children = links .stream() - .filter(link -> isDuplicate(link)) + .filter(this::isDuplicate) .map(link -> { final String targetType = link.getTarget().getType(); final String name = ModelSupport.getMainType(EntityType.valueOf(targetType)); @@ -1263,7 +1237,7 @@ public class XmlRecordFactory implements Serializable { return extraInfo != null ? extraInfo .stream() - .map(e -> XmlSerializationUtils.mapExtraInfo(e)) + .map(XmlSerializationUtils::mapExtraInfo) .collect(Collectors.toList()) : Lists.newArrayList(); } @@ -1287,9 +1261,6 @@ public class XmlRecordFactory implements Serializable { if (def == null) { continue; - // throw new IllegalStateException(String.format("cannot find context for id - // '%s'", - // id)); } if (def.getName().equals("context")) { @@ -1327,7 +1298,9 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + factory.setFeature(DISALLOW_DOCTYPE_DECL, true); + Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; } catch (TransformerConfigurationException e) { @@ -1354,8 +1327,10 @@ public class XmlRecordFactory implements Serializable { Document fundingPath; try { - fundingPath = new SAXReader().read(new StringReader(xmlTree)); - } catch (final DocumentException e) { + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + fundingPath = reader.read(new StringReader(xmlTree)); + } catch (final DocumentException | SAXException e) { throw new RuntimeException(e); } try { @@ -1407,7 +1382,9 @@ public class XmlRecordFactory implements Serializable { protected static String getRelFundingTree(final String xmlTree) { String funding = ""; try { - final Document ftree = new SAXReader().read(new StringReader(xmlTree)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document ftree = reader.read(new StringReader(xmlTree)); funding = ""; funding += getFunderElement(ftree); @@ -1427,7 +1404,7 @@ public class XmlRecordFactory implements Serializable { + e.getName() + ">"; } - } catch (final DocumentException e) { + } catch (final DocumentException | SAXException e) { throw new IllegalArgumentException( "unable to parse funding tree: " + xmlTree + "\n" + e.getMessage()); } finally { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 8195467b16..213a62b32f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -11,9 +11,12 @@ public class XmlSerializationUtils { // XML 1.0 // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - private static final String xml10pattern = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + private static final String XML_10_PATTERN = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff" + "]"; + private XmlSerializationUtils() { + } + public static String mapJournal(Journal j) { final String attrs = new StringBuilder() .append(attr("issn", j.getIssnPrinted())) @@ -50,12 +53,12 @@ public class XmlSerializationUtils { public static String escapeXml(final String value) { return value - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">") - .replaceAll("\"", """) - .replaceAll("'", "'") - .replaceAll(xml10pattern, ""); + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'") + .replaceAll(XML_10_PATTERN, ""); } public static String parseDataInfo(final DataInfo dataInfo) { @@ -70,18 +73,6 @@ public class XmlSerializationUtils { .toString(); } - private static StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo info) { - return sb - .append( - attr("inferred", info.getInferred() != null ? info.getInferred().toString() : "")) - .append(attr("inferenceprovenance", info.getInferenceprovenance())) - .append( - attr( - "provenanceaction", - info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")) - .append(attr("trust", info.getTrust())); - } - public static String mapKeyValue(final String name, final KeyValue kv) { return new StringBuilder() .append("<") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java index 6cec3ed53e..903150ca8d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java @@ -25,7 +25,7 @@ public class ZkServers { // quorum0:2182,quorum1:2182,quorum2:2182,quorum3:2182,quorum4:2182/solr-dev-openaire String urls = zkUrl; final Optional chRoot = Optional.of(SEPARATOR + StringUtils.substringAfterLast(zkUrl, SEPARATOR)); - if (chRoot.isPresent() && StringUtils.isNotBlank(chRoot.get())) { + if (StringUtils.isNotBlank(chRoot.get())) { log.debug(String.format("found zk chroot %s", chRoot)); urls = zkUrl.replace(chRoot.get(), ""); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index ffeb0995d1..e5faccd0f3 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -45,6 +45,7 @@ public class DropAndCreateESIndex { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final String ip = clusterMap.get(cluster).split(",")[0]; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java index 3f842ef34c..dd08215d54 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java @@ -44,6 +44,7 @@ public class SparkIndexCollectionOnES { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") final Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe48..9f022454be 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; +import java.util.Objects; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +16,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -35,7 +33,7 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; * * The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities. */ -public class IndexRecordTransformerTest { +class IndexRecordTransformerTest { public static final String VERSION = "2021-04-15T10:05:53Z"; public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; @@ -48,23 +46,23 @@ public class IndexRecordTransformerTest { } @Test - public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + void testPreBuiltRecordTransformation() throws IOException, TransformerException { + String record = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("record.xml"))); testRecordTransformation(record); } @Test - public void testPublicationRecordTransformation() throws IOException, TransformerException { + void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, XmlRecordFactoryTest.otherDsTypeId); Publication p = load("publication.json", Publication.class); Project pj = load("project.json", Project.class); Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -80,8 +78,9 @@ public class IndexRecordTransformerTest { } private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + String fields = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("fields.xml"))); + String xslt = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); @@ -99,7 +98,7 @@ public class IndexRecordTransformerTest { private T load(String fileName, Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); + .readValue(IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(fileName))), clazz); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java index 6818cf6a51..c22a24185b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -62,7 +65,7 @@ public class PrepareRelationsJobTest { } @Test - public void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { + void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { final int maxRelations = 20; PrepareRelationsJob @@ -83,7 +86,7 @@ public class PrepareRelationsJobTest { .as(Encoders.bean(Relation.class)) .cache(); - Assertions.assertEquals(maxRelations, out.count()); + assertEquals(maxRelations, out.count()); Dataset freq = out .toDF() @@ -97,13 +100,13 @@ public class PrepareRelationsJobTest { long participation = getRows(freq, PARTICIPATION).get(0).getAs("count"); long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count"); - Assertions.assertTrue(participation == outcome); - Assertions.assertTrue(outcome > affiliation); - Assertions.assertTrue(participation > affiliation); + assertEquals(outcome, participation); + assertTrue(outcome > affiliation); + assertTrue(participation > affiliation); - Assertions.assertEquals(7, outcome); - Assertions.assertEquals(7, participation); - Assertions.assertEquals(6, affiliation); + assertEquals(7, outcome); + assertEquals(7, participation); + assertEquals(6, affiliation); } protected List getRows(Dataset freq, String col) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index f57b8dcafd..9d5bff3cf3 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -1,39 +1,42 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class SolrAdminApplicationTest extends SolrTest { +class SolrAdminApplicationTest extends SolrTest { @Test - public void testPing() throws Exception { + void testPing() throws Exception { SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); log.info("pingResponse: '{}'", pingResponse.getStatus()); - Assertions.assertTrue(pingResponse.getStatus() == 0); + assertEquals(0, pingResponse.getStatus()); } @Test - public void testAdminApplication_DELETE() throws Exception { + void testAdminApplication_DELETE() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } @Test - public void testAdminApplication_COMMIT() throws Exception { + void testAdminApplication_COMMIT() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java index 72f28fdf2d..dc0a40471d 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java @@ -28,7 +28,6 @@ public class SortableRelationKeyTest { .map(r -> SortableRelationKey.create(r, r.getSource())) .sorted() .forEach( - it -> { try { System.out.println(mapper.writeValueAsString(it)); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index d7bcb31851..6f19565784 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -89,7 +89,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_onSolr() throws Exception { + void testXmlIndexingJob_onSolr() throws Exception { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; @@ -112,7 +112,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_saveOnHDFS() throws Exception { + void testXmlIndexingJob_saveOnHDFS() throws Exception { final String ID_XPATH = "//header/*[local-name()='objIdentifier']"; String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 221049f903..d89b3b0684 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -13,6 +13,7 @@ import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -21,7 +22,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; -public class XmlRecordFactoryTest { +class XmlRecordFactoryTest { public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -37,11 +37,11 @@ public class XmlRecordFactoryTest { .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test - public void testXMLRecordFactory() throws IOException, DocumentException { + void testXMLRecordFactory() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -72,11 +72,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -104,11 +104,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -135,7 +135,7 @@ public class XmlRecordFactoryTest { } @Test - public void testEnermapsRecord() throws IOException, DocumentException { + void testEnermapsRecord() throws IOException, DocumentException, SAXException { String contextmap = "" + @@ -144,7 +144,7 @@ public class XmlRecordFactoryTest { ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Dataset d = OBJECT_MAPPER diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java index 5b2e6804b9..93f1bc0877 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java @@ -7,16 +7,8 @@ package eu.dnetlib.oa.graph.usagerawdata.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; -import java.util.Properties; -import org.apache.log4j.Logger; - -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -31,7 +23,9 @@ public abstract class ConnectDB { private static String dbImpalaUrl; private static String usageStatsDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -72,10 +66,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -97,10 +87,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); diff --git a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java index e53709f1a5..afd7f9807f 100644 --- a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java @@ -7,20 +7,14 @@ package eu.dnetlib.oa.graph.usagestatsbuild.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; -import java.util.Properties; import org.apache.log4j.Logger; -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -37,7 +31,9 @@ public abstract class ConnectDB { private static String usageStatsDBSchema; private static String usagestatsPermanentDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -94,10 +90,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -119,10 +111,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); From 61d811ba531ab79ce314f799fa8471007f7dd03c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 12:18:20 +0200 Subject: [PATCH 088/162] suggestions from intellij --- .../eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 2 +- .../dhp/oa/graph/raw/PatchRelationApplicationTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index 6c309acb3b..cc36421a0a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; public class EXCELParserTest { private static Path workingDir; - private HttpConnector2 httpConnector = new HttpConnector2(); + private final HttpConnector2 httpConnector = new HttpConnector2(); private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; @BeforeAll diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index c8158c044e..c9c32edd96 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -81,8 +81,8 @@ public class PatchRelationApplicationTest { PatchRelationsApplication.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphBasePath", graphBasePath, - "-workingDir", workingDir.toString() + "/workingDir", - "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + "-workingDir", workingDir + "/workingDir", + "-idMappingPath", workingDir + "/" + ID_MAPPING_PATH }); final List rels = spark From 9f4db73f30981aafb57440a3332c4095ef30f8d9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 15:02:51 +0200 Subject: [PATCH 089/162] updated/fixed unit tests --- .../collection/plugin/rest/RestIterator.java | 1 - .../oa/dedup/DispatchEntitiesSparkJob.java | 1 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 6 ++-- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 29 +++++++++++-------- .../doiboost/orcid/OrcidClientTest.java | 1 + .../orcidnodoi/PublicationToOafTest.java | 4 +-- .../graph/dump/community/CommunitySplit.java | 28 ++++++++++-------- .../oa/graph/dump/SplitForCommunityTest.java | 2 +- .../CreateRelatedEntitiesJob_phase1.java | 7 +++-- .../oa/provision/utils/XmlRecordFactory.java | 1 - 10 files changed, 45 insertions(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 67ec22b465..a90d259b44 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -132,7 +132,6 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { final TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 2cdc221538..ea738836b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -57,6 +57,7 @@ public class DispatchEntitiesSparkJob { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 1860a3ff0e..2f992bd78a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -195,7 +195,7 @@ public class SparkDedupTest implements Serializable { assertEquals(3082, orgs_simrel); assertEquals(7036, pubs_simrel); - assertEquals(344, sw_simrel); + assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); assertEquals(6750, orp_simrel); } @@ -346,7 +346,7 @@ public class SparkDedupTest implements Serializable { assertEquals(1272, orgs_mergerel); assertEquals(1438, pubs_mergerel); - assertEquals(288, sw_mergerel); + assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); assertEquals(718, orp_mergerel); @@ -535,7 +535,7 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4862, relations); + assertEquals(4860, relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 2efbe260af..1ba2c717ce 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -41,7 +41,7 @@ public class SparkStatsTest implements Serializable { private static final String testActionSetId = "test-orchestrator"; @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { + public static void beforeAll() throws IOException, URISyntaxException { testGraphBasePath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) @@ -73,7 +73,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); @@ -82,7 +82,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); @@ -91,7 +91,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); @@ -100,7 +100,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); @@ -109,7 +109,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); @@ -118,7 +118,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); } @@ -129,7 +129,7 @@ public class SparkStatsTest implements Serializable { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/createBlockStats_parameters.json"))); parser @@ -168,10 +168,15 @@ public class SparkStatsTest implements Serializable { .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(549, orgs_blocks); - assertEquals(299, pubs_blocks); + assertEquals(477, orgs_blocks); + assertEquals(295, pubs_blocks); assertEquals(122, sw_blocks); - assertEquals(186, ds_blocks); - assertEquals(170, orp_blocks); + assertEquals(191, ds_blocks); + assertEquals(171, orp_blocks); + } + + @AfterAll + public static void tearDown() { + spark.close(); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 82577e0d8b..9ea7c69595 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -160,6 +160,7 @@ public class OrcidClientTest { } @Test + @Disabled void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 1dbb37fca9..54c16b5d78 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static org.junit.jupiter.api.Assertions.*; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,14 +15,13 @@ import com.google.gson.JsonParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import jdk.nashorn.internal.ir.annotations.Ignore; class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test - @Ignore + @Disabled void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index a9c0770a80..b92eb3e602 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -48,29 +49,32 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities - .stream() .forEach(c -> printResult(c, result, outputPath)); } - private static void printResult(String c, Dataset result, String outputPath) { - Dataset community_products = result - .filter((FilterFunction) r -> containsCommunity(r, c)); + private static void printResult(String community, Dataset result, String outputPath) { + Dataset communityProducts = result + .filter((FilterFunction) r -> containsCommunity(r, community)); - community_products.first(); - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); + try { + communityProducts.first(); + communityProducts + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + community); + } catch (NoSuchElementException e) { + // ignoring it on purpose + } } - private static boolean containsCommunity(CommunityResult r, String c) { + private static boolean containsCommunity(CommunityResult r, String community) { if (Optional.ofNullable(r.getContext()).isPresent()) { return !r .getContext() .stream() - .filter(con -> con.getCode().equals(c)) + .filter(con -> con.getCode().equals(community)) .collect(Collectors.toList()) .isEmpty(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 2ea4bc91ac..e6f0e91069 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - void test1() { + void testCommunitySplit() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 3301577eea..ed33ff6b68 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -72,6 +72,7 @@ public class CreateRelatedEntitiesJob_phase1 { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); @@ -223,10 +224,10 @@ public class CreateRelatedEntitiesJob_phase1 { /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * file * - * @param spark - * @param relationPath + * @param spark the SparkSession + * @param relationPath the path storing the relation objects * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 631335376f..adc8aca9e5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1299,7 +1299,6 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { final TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature(DISALLOW_DOCTYPE_DECL, true); Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; From 52c18c2697b17e17916284b7b2f028ad28df7bcf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:16:55 +0200 Subject: [PATCH 090/162] removed not needed test class. Teh functionality has been moved --- .../eu/dnetlib/dhp/doiboost/GetCSVTest.java | 43 ------------------- 1 file changed, 43 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java deleted file mode 100644 index 6cfc90736e..0000000000 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.doiboost; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -public class GetCSVTest { - - @Test - public void readUnibiGoldTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ','); - - Assertions.assertEquals(72, pl.size()); - -// ObjectMapper OBJECT_MAPPER = new ObjectMapper(); -// -// pl.forEach(res -> { -// try { -// System.out.println(OBJECT_MAPPER.writeValueAsString(res)); -// } catch (JsonProcessingException e) { -// e.printStackTrace(); -// } -// }); - - } -} From b1c6140ebf01608af7792d4d528c83ea62760616 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:23:33 +0200 Subject: [PATCH 091/162] removed all comments in Italian --- .../hostedbymap/SparkApplyHostedByMapToDatasource.scala | 2 -- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 5 ----- .../hostedbymap/SparkPrepareHostedByInfoToApply.scala | 9 +++++---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index fad313f1cd..ae1454b479 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -61,8 +61,6 @@ object SparkApplyHostedByMapToDatasource { val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 4c3b98f3b4..533e439b7f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -14,9 +14,6 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance -// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - object SparkApplyHostedByMapToResult { @@ -76,8 +73,6 @@ object SparkApplyHostedByMapToResult { val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance - // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index d342d57b07..6db0ad33db 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -106,14 +106,15 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere la hostedbymap e trasformarla in entity info + //STEP1: read the hostedbymap and transform it in EntityInfo val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") - //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just + //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) + //to this entry we add the id of the datasource for the next step joinResHBM(resultInfoDataset, hostedByInfo) .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) From 822963283954830d16429411e983529de815b179 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:36:01 +0200 Subject: [PATCH 092/162] adding assertions to the mapping of the unibi part of gold list --- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index f886b275b4..98b811bb65 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -7,6 +7,8 @@ import java.net.URLConnection; import java.nio.charset.Charset; import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -29,18 +31,13 @@ public class TestReadCSV { .build() .parse(); - ObjectMapper mapper = new ObjectMapper(); - - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + Assertions.assertEquals(36, beans.size()); + Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); + Assertions.assertTrue(beans.stream().anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); + Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); } - + @Disabled @Test public void testCSVUrlUnibi() throws IOException { @@ -66,6 +63,7 @@ public class TestReadCSV { ); } + @Disabled @Test public void testCSVUrlDOAJ() throws IOException { From f9b6b45d85e3ea9e676062366a2e249de1f7bd03 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:04:48 +0200 Subject: [PATCH 093/162] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bcf..93cc00eca4 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 55fc500d8d0d6736ca10e06c01e83c784299e93f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:17:48 +0200 Subject: [PATCH 094/162] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml index b421f1342a..06325810b4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml @@ -31,7 +31,7 @@ https://pub.uni-bielefeld.de/record/1997560.json - 0016-9056 + 0016-9056 ger Friedrich From 7aa3260729a7a145842958526d3feedfa3461e8c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:18:45 +0200 Subject: [PATCH 095/162] reverting --- .../original/dc_cleaning_OPENAIREplus_compliant_hal_orig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index 505f562941..d4eb71dc42 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -134,9 +134,9 @@ oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-orig oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; // journal data -// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the issn (as e.g. print/online/...) -$varISSN = xpath:"//dc:source[starts-with(., 'issn:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'issn:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'issn:')), 1, 4))))"; -//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'issn:'))"; +// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...) +$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))"; +//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))"; $varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))"; oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";); From 524c06e0283244758df98add2d622db44dd7f16b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:20:30 +0200 Subject: [PATCH 096/162] reverting --- ...e_datacite_ExchangeLandingpagePid_orig.xsl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl index f80c91a6a2..3cfaec80b1 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl @@ -169,7 +169,7 @@ @@ -283,7 +283,7 @@ - + - + - + @@ -793,9 +793,9 @@ - + - + @@ -844,7 +844,7 @@ - + - + - + @@ -594,9 +594,9 @@ - + - + From 804589eb30f616314c9d9e01c0681312548e2777 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:23:35 +0200 Subject: [PATCH 098/162] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bcf..93cc00eca4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From b6b58bba281176ea534c75cee5a440b99ac4908c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:25:37 +0200 Subject: [PATCH 099/162] reverting --- .../eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv index 0c1fd9e64b..6a5fc3f871 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv @@ -36,10 +36,10 @@ article-number,String,No,,N/A, published-print,Partial Date,No,Date on which the work was published in print,"Result/relevantDate with Qualifier(""published-print"", ""dnet:dataCite_date"")", published-online,Partial Date,No,Date on which the work was published online,"Result/relevantDate with Qualifier(""published-online"", ""dnet:dataCite_date"")", subject,Array of String,No,"Subject category names, a controlled vocabulary from Sci-Val. Available for most journal articles","Result/subject with Qualifier(""keywords"", ""dnet:subject_classification_typologies""). ","Future improvements: map the controlled vocabulary instead of using the generic ""keywords"" qualifier" -issn,Array of String,No,,"Publication/Journal/issn +ISSN,Array of String,No,,"Publication/Journal/issn Publication/Journal/lissn Publication/Journal/eissn",The mapping depends on the value of issn-type -issn-type,Array of issn with Type,No,List of ISSNs with issn type information,N/A,Its value guides the setting of the properties in Journal (see row above) +issn-type,Array of ISSN with Type,No,List of ISSNs with ISSN type information,N/A,Its value guides the setting of the properties in Journal (see row above) ISBN,Array of String,No,,Publication/source,"In case of Book We can map ISBN and container title on Publication/source using this syntax container-title + ""ISBN: "" + ISBN" archive,Array of String,No,,N/A, license,Array of License,No,,Result/Instance/License, From c6a2a780a9b181395ae455a661ff620ecbcb6f22 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:30:17 +0200 Subject: [PATCH 100/162] reverting --- .../src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 4db5fd463d..ba47aaf5c8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 0e1a6bec20edf4995f9ccb9825907f3a545f3817 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:32:29 +0200 Subject: [PATCH 101/162] reverting --- .../resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml index b45fa1edba..95457fb701 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml @@ -16,7 +16,7 @@ Doğuş Üniversitesi, Fen Edebiyat Fakültesi, Fizik Bölümü TR3959 urn:issn:1748-0221 - VOLUME=7;ISSUE=1;issn=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSUE=1;ISSN=1748-0221;TITLE=Journal of Instrumentation ATLAS Collaboration Mitsou, Vasiliki Fiorini, Luca Ros Martínez, Eduardo Castillo Giménez, María Victoria Fuster Verdú, Juan A. García García, Carmen Cabrera Urbán, Susana Martí García, Salvador Salt Cairols, José Lacasta Llácer, Carlos Valls Ferrer, @@ -30,7 +30,7 @@ interactions. Journal of Instrumentation, 7(1). doi: 10.1088/1748-0221/7/01/P01013. UC Santa Cruz: Retrieved from: http://www.escholarship.org/uc/item/05j2j2br Journal of Instrumentation, 7 - VOLUME=7;issn=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSN=1748-0221;TITLE=Journal of Instrumentation 1748-0221 Journal of Instrumentation 7, P01013 (2012). doi:10.1088/1748-0221/7/01/P01013 From 8ad7c7141702b3a1638d7d1abe26889f300ea480 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:36:12 +0200 Subject: [PATCH 102/162] reverting --- .../dhp/sx/graph/bio/pubmed/pubmed.xml | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml index 06ebe97cd5..22da07e299 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml @@ -16,7 +16,7 @@
- 0006-2944 + 0006-2944 13 2 @@ -182,7 +182,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -314,7 +314,7 @@
- 0006-291X + 0006-291X 66 4 @@ -460,7 +460,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -644,7 +644,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -774,7 +774,7 @@
- 0006-291X + 0006-291X 66 4 @@ -934,7 +934,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -1614,7 +1614,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -2017,7 +2017,7 @@
- 0006-2952 + 0006-2952 24 17 @@ -2185,7 +2185,7 @@
- 1873-2968 + 1873-2968 24 17 @@ -2337,7 +2337,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2585,7 +2585,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2838,7 +2838,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3020,7 +3020,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3204,7 +3204,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3450,7 +3450,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3683,7 +3683,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3880,7 +3880,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4069,7 +4069,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4428,7 +4428,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4590,7 +4590,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4741,7 +4741,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4999,7 +4999,7 @@
- 0004-4172 + 0004-4172 25 9 From b9663298330212499fef14413b35ced9d336dc57 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:37:00 +0200 Subject: [PATCH 103/162] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bcf..93cc00eca4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From cc3d72df0e9dd8781e0ae8ae59b57032bca4098e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:42:01 +0200 Subject: [PATCH 104/162] removing not needed dependency --- dhp-common/pom.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index a6a57ce7b1..c66c1d3bc4 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -118,11 +118,7 @@ dhp-schemas - - org.apache.commons - commons-csv - 1.8 - + From 95e5482bbb35acc1ae269fb018017ec3f8240583 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:42:26 +0200 Subject: [PATCH 105/162] removing not needed dependency --- dhp-workflows/dhp-doiboost/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index ea8832754d..385f87ad2f 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -82,12 +82,6 @@ org.apache.commons commons-text - - eu.dnetlib.dhp - dhp-aggregation - 1.2.4-SNAPSHOT - compile - From 785db1d5b2c26cf3e81bc9a38b54ec60c26764f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:44:07 +0200 Subject: [PATCH 106/162] refactoring --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 98b811bb65..89259d8147 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -33,10 +33,15 @@ public class TestReadCSV { Assertions.assertEquals(36, beans.size()); Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); - Assertions.assertTrue(beans.stream().anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); + Assertions + .assertTrue( + beans + .stream() + .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); } + @Disabled @Test public void testCSVUrlUnibi() throws IOException { From 9650eea497b3d131fab34f37afab12c872a26afc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:45:48 +0200 Subject: [PATCH 107/162] reverting --- dhp-common/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c66c1d3bc4..4c7810c47c 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,12 +117,6 @@ eu.dnetlib.dhp dhp-schemas - - - - - - From e33daaeee89b253cde8968a8c33cad4b346e9aae Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:46:19 +0200 Subject: [PATCH 108/162] reverting --- dhp-workflows/dhp-doiboost/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 385f87ad2f..f496ea9a29 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -84,6 +84,7 @@ + From ac417ca798b07e77765d12ffe8c3c3774b1bc111 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:50:33 +0200 Subject: [PATCH 109/162] removed not needed test resource --- .../dhp/doiboost/issn_gold_oa_version_4.csv | 73 ------------------- 1 file changed, 73 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv deleted file mode 100644 index ebde1bf189..0000000000 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv +++ /dev/null @@ -1,73 +0,0 @@ -"issn","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" -"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" -"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" -"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" -"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" -"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" -"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" -"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" -"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" -"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" -"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" -"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" -"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" -"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" -"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" -"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" -"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" -"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" -"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" -"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" -"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" -"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" -"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" -"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" -"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" -"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" -"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" -"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" -"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" -"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" -"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" -"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" -"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" -"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" -"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" -"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" -"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" -"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ" -"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD" -"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ" -"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ" -"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ" -"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ" -"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ" -"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ" -"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ" -"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ" -"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD" -"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ" -"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD" -"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ" -"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ" -"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ" -"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ" -"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ" -"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD" -"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ" -"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD" -"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD" -"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ" -"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD" -"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ" -"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ" -"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ" -"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ" -"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD" -"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD" -"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ" -"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ" -"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD" -"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ" -"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ" -"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ" From 08dd2b2102c3a8074c854839cc4b18f948d24b95 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 18:09:41 +0200 Subject: [PATCH 110/162] moving the dependency version to the external pom file --- dhp-workflows/dhp-graph-mapper/pom.xml | 1 - pom.xml | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 665c01276a..6c5e4609a3 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -125,7 +125,6 @@ com.opencsv opencsv - 5.5 diff --git a/pom.xml b/pom.xml index 433c880930..d31ffb88d5 100644 --- a/pom.xml +++ b/pom.xml @@ -519,6 +519,12 @@ ${common.text.version} + + com.opencsv + opencsv + 5.5 + + From 8cdce59e0ebd826efd1b8845011ce3f2bf494e71 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:26 +0200 Subject: [PATCH 111/162] [graph raw] let the mapping exceptions propagate --- .../raw/AbstractMdRecordToOafMapper.java | 82 +++++++------------ .../raw/GenerateEntitiesApplication.java | 3 +- .../raw/GenerateEntitiesApplicationTest.java | 6 +- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 0 4 files changed, 36 insertions(+), 55 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 9aa4e4c313..526f45f6ef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -28,10 +28,7 @@ import java.util.Optional; import java.util.Set; import org.apache.commons.lang3.StringUtils; -import org.dom4j.Document; -import org.dom4j.DocumentFactory; -import org.dom4j.DocumentHelper; -import org.dom4j.Node; +import org.dom4j.*; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper { this.forceOriginalId = false; } - public List processMdRecord(final String xml) { - try { - DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); + public List processMdRecord(final String xml) throws DocumentException { - final Document doc = DocumentHelper - .parseText( - xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final Document doc = DocumentHelper + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - if (collectedFrom == null) { - return Lists.newArrayList(); - } + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - - if (hostedBy == null) { - return Lists.newArrayList(); - } - - final DataInfo info = prepareDataInfo(doc, invisible); - final long lastUpdateTimestamp = new Date().getTime(); - - final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); - - final String type = getResultType(doc, instances); - - return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - } catch (final Exception e) { - throw new RuntimeException(e); + if (collectedFrom == null) { + return Lists.newArrayList(); } + + final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + + if (hostedBy == null) { + return Lists.newArrayList(); + } + + final DataInfo info = prepareDataInfo(doc, invisible); + final long lastUpdateTimestamp = new Date().getTime(); + + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + + final String type = getResultType(doc, instances); + + return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } protected String getResultType(final Document doc, final List instances) { @@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper { return vocs.getTermAsQualifier(schemeId, classId); } - protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final DataInfo info) { - final List res = new ArrayList<>(); - - for (final Object o : node.selectNodes(xpath)) { - final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId).trim(); - res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); - } - return res; - } - protected List prepareListStructPropsWithValidQualifier( final Node node, final String xpath, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 9027b49d75..6bb18c3756 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -17,6 +17,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,7 +159,7 @@ public class GenerateEntitiesApplication { final String id, final String s, final boolean shouldHashId, - final VocabularyGroup vocs) { + final VocabularyGroup vocs) throws DocumentException { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 1e974dd692..67490a4706 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.List; import org.apache.commons.io.IOUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest { } @Test - void testMergeResult() throws IOException { + void testMergeResult() throws IOException, DocumentException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); @@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest { assertEquals(resultType, merge.getResulttype().getClassid()); } - protected Result getResult(String xmlFileName, Class clazz) throws IOException { + protected Result getResult(String xmlFileName, Class clazz) + throws IOException, DocumentException { final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); return new OdfToOafMapper(vocs, false, true) .processMdRecord(xml) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml new file mode 100644 index 0000000000..e69de29bb2 From 86d940044c067f71ac0bc451885ecd329f117cc6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:56 +0200 Subject: [PATCH 112/162] added test to verify bad records from FWF-E-Book-Library --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 43 ++++++++----- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 61 +++++++++++++++++++ 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 000dbfe25d..5228d1d02d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -13,6 +13,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -48,7 +49,7 @@ class MappersTest { } @Test - void testPublication() throws IOException { + void testPublication() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -145,7 +146,7 @@ class MappersTest { } @Test - void testPublication_PubMed() throws IOException { + void testPublication_PubMed() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); @@ -217,7 +218,7 @@ class MappersTest { } @Test - void testPublicationInvisible() throws IOException { + void testPublicationInvisible() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -233,7 +234,17 @@ class MappersTest { } @Test - void testDataset() throws IOException { + void testOdfFwfEBookLibrary() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_fwfebooklibrary.xml"))); + + assertThrows( + IllegalArgumentException.class, + () -> new OdfToOafMapper(vocs, false, true).processMdRecord(xml)); + } + + @Test + void testDataset() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -346,7 +357,7 @@ class MappersTest { } @Test - void testOdfBielefeld() throws IOException { + void testOdfBielefeld() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); @@ -394,7 +405,7 @@ class MappersTest { } @Test - void testOpentrial() throws IOException { + void testOpentrial() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); @@ -511,7 +522,7 @@ class MappersTest { } @Test - void testSoftware() throws IOException { + void testSoftware() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -530,7 +541,7 @@ class MappersTest { } @Test - void testClaimDedup() throws IOException { + void testClaimDedup() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -544,7 +555,7 @@ class MappersTest { } @Test - void testNakala() throws IOException { + void testNakala() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -572,7 +583,7 @@ class MappersTest { } @Test - void testEnermaps() throws IOException { + void testEnermaps() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -597,7 +608,7 @@ class MappersTest { } @Test - void testClaimFromCrossref() throws IOException { + void testClaimFromCrossref() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -614,7 +625,7 @@ class MappersTest { } @Test - void testODFRecord() throws IOException { + void testODFRecord() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -628,7 +639,7 @@ class MappersTest { } @Test - void testTextGrid() throws IOException { + void testTextGrid() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -662,7 +673,7 @@ class MappersTest { } @Test - void testBologna() throws IOException { + void testBologna() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -679,7 +690,7 @@ class MappersTest { } @Test - void testJairo() throws IOException { + void testJairo() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -703,7 +714,7 @@ class MappersTest { } @Test - void testOdfFromHdfs() throws IOException { + void testOdfFromHdfs() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml index e69de29bb2..cead018d17 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml @@ -0,0 +1,61 @@ + + +
+ oai:e-book.fwf.ac.at:o:116 + 2020-04-08T13:25:21.742Z + text + 2021-08-12T00:16:55.365Z +
+ + + + 978-3-205-77704-5 + https://e-book.fwf.ac.at/o:116 + + + Although small and not particularly peoples both Chechens and Palestinians became famous for suicide bomber attacks in recent years. This can - partly - be explained by the unrecognised collective traumas of the past. Both Chechens and Palestinians experienced collective traumas in the 1940ties. The entire Chechen population wad deported by Josef Stalin to Kasakhstan, Kirgysia and Sibiria in February 1944 under the pretext of collaboration with the Third Reich. Those who survived were allowed to return in 1957 to Chechenya. Half of the Palestinian Arab population was expelled from Palestine in 1947/48, when fighting erupted between Jews and Arabs. The refugees were never allowed to return. The memory of the deportation/expulsion was kept alive. The founding traumas contributed to the development of Chechen and Palestinian nationalism. Chechens and Palestinians till today suffer from their collective traumas, which stayed unrecognised and therefore create psychological and political problems for the following generations - and for their adverseries. The phenomenon of the "closed circle of violence" created a phobic collective behaviour, which led for example Chechens to the illusionary declaration of independence in 1991. It also led to the individual overreaction of young Chechens or Palestinians, who became living bombs. The collective Trauma, if untreated, poses a threat to any peaceful political solution. + 1. Einleitung Ausgehend von der Fragestellung, warum gerade bei Tschetschenen und Palästinensern der Selbstmordterrorismus in den letzten Jahren so populär geworden ist, analysiert die Autorin die Geschichte dieser beiden Völker. Einer der Gründe ist bisher wenig beachtet worden. Der Einfluss eines kollektiven Traumas, das als solches nicht anerkannt, behandelt und auch nicht einer politischen Lösung zugeführt wurde. 2. Geschichte der Palästinenser und Tschetschenen Im Zuge der Errichtung Israels im Unabhängigkeitskrieg 1948 verlor die Hälfte des palästinensischen Volkes - 750.000 Menschen - ihre Heimat. Unter der Führung von Jassir Arafat kämpften sie in den Jahrzehnten danach - mit Gewalt und am Verhandlungstisch - um einen eigenen Staat. Das Recht auf Rückkehr spielte dabei immer eine besondere Rolle. Die "Nakbah", die als Katastrophe empfundene Vertreibung 1948, wurde dabei Bezugs- und Angelpunkt mehrer Generationen von Flüchtlingen. Die Weigerung Israels, die Mitverantwortung für die Vertreibung der Palästinenser zu übernehmen und das kollektive Trauma der Palästinenser anzuerkennen - aus Angst vor einer Infragestellung des eigenen Staates - ist einer der Gründe, warum der Nahostkonflikt bisher nicht gelöst werden konnte. Auch die Tschetschenen durften jahrzehntelang über die Deportation ihres Volkes nicht einmal sprechen. Hatte Josef Stallin sie erst unter dem Vorwand der Kollaboration mit Nazi-Deutschland deportiert, waren sie zwar nach seinem Tod in die Heimat zurückgekehrt, lebten dort aber jahrzehntelang weiterhin als "unzuverlässiges Volk". Das kollektive Trauma der Deportation konnte nur mündlich überliefert werden. Mit dem Zusammenbruch der Sowjetunion brach der ungelöste Konflikt zwischen Tschetschenien und Russland sofort auf, das Land ging in blutigen Kriegen unter. 3. Zusammenfassung Die kollektive Erinnerung ist in den vergangenen Jahrzehnten zu einem zentralen Forschungsthema geworden. Der vorsichtige Einsatz von in der Individualpsychologie gewonnenen Erkenntnissen in der Behandlung von kollektiven Traumata, um zu einer politischen Lösung zu kommen, ist eine Chance. Das Studium historischer Fakten in Kombination mit den Erkenntnissen der Psychologie und Psychiatrie bietet die Basis für eine politische Lösung. Die vorliegende Arbeit zeigt, dass kollektive Traumata, die nicht behandelt werden, immer wieder, auch Generationen später, zu kollektiven Reaktionen führen können, die auf den ersten Blick irrational erscheinen. Die vielleicht radikalste Form des politischen Widerstandes, das Selbstmordattentat, ist dafür ein Beispiel. + deu/ger + Böhlau + application/pdf + + Trauma und Terror: Zum palästinensischen und tschetschenischen Nationalismus + + + + Szyszkowitz, Tessa + Tessa + Szyszkowitz + + + + Trauma, Terror, Palestinians, Suicide attacks, Recognition, Chechens + ÖFOS 2002, Contemporary history + BIC Standard Subject Categories, Postwar 20th century history, from c 1945 to c 2000 (HBLW3) + ÖFOS 2002, Zeitgeschichte + + + 14.02 MB + + + 2007 + + + literature + 2007-01-01 + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + deu/ger + fwf_________::D 3929 + + + https://fedora.e-book.fwf.ac.at/fedora/get/o:116/bdef:Content/download + +
\ No newline at end of file From 6e84b3951f685061cc61e71c124cf4a7518ccee2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:57:41 +0200 Subject: [PATCH 113/162] GetCSV refactoring - moving classes to dhp-common that have dependency with GetCSV class (that was located in graph-mapper) --- .../common/aggregation}/AggregatorReport.java | 2 +- .../collection/CollectorException.java | 2 +- .../dnetlib/dhp/common/collection/GetCSV.java | 65 +++++++++++++++++++ .../common}/collection/HttpClientParams.java | 2 +- .../common}/collection/HttpConnector2.java | 4 +- 5 files changed, 70 insertions(+), 5 deletions(-) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common => dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation}/AggregatorReport.java (96%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/CollectorException.java (93%) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpClientParams.java (97%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpConnector2.java (98%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java index 8e46ab92b8..c5926848e7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.aggregation.common; +package eu.dnetlib.dhp.common.aggregation; import java.io.Closeable; import java.io.IOException; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java index 144d297e62..5d94c2f89a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; public class CollectorException extends Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java new file mode 100644 index 0000000000..44f4121ebe --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + + } + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass, char delimiter) throws IOException, ClassNotFoundException { + + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index ab0d5cc02a..6fcec00dd1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; /** * Bundles the http connection parameters driving the client behaviour. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 8493a3436a..724f5f0e1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; import static eu.dnetlib.dhp.utils.DHPUtils.*; @@ -15,7 +15,7 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java From bfe8f5335c733177b338b54f1e63936c62b362cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:58:14 +0200 Subject: [PATCH 114/162] GetCSV refactoring - copied model classes in test path --- .../collection/models/CSVProgramme.java | 80 +++++++++++++++++ .../common/collection/models/CSVProject.java | 90 +++++++++++++++++++ .../common/collection/models/DOAJModel.java | 52 +++++++++++ .../collection/models/UnibiGoldModel.java | 45 ++++++++++ 4 files changed, 267 insertions(+) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java new file mode 100644 index 0000000000..d17fcc75ef --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java @@ -0,0 +1,80 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; + +/** + * The model for the programme csv file + */ +public class CSVProgramme implements Serializable { + + @CsvBindByName(column = "code") + private String code; + + @CsvBindByName(column = "title") + private String title; + + @CsvBindByName(column = "shortTitle") + private String shortTitle; + + @CsvBindByName(column = "language") + private String language; + + @CsvIgnore + private String classification; + + @CsvIgnore + private String classification_short; + + public String getClassification_short() { + return classification_short; + } + + public void setClassification_short(String classification_short) { + this.classification_short = classification_short; + } + + public String getClassification() { + return classification; + } + + public void setClassification(String classification) { + this.classification = classification; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getShortTitle() { + return shortTitle; + } + + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java new file mode 100644 index 0000000000..19606a09e7 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -0,0 +1,90 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "status") + private String status; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + @CsvBindByName(column = "title") + private String title; + + @CsvBindByName(column = "call") + private String call; + + @CsvBindByName(column = "subjects") + private String subjects; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getCall() { + return call; + } + + public void setCall(String call) { + this.call = call; + } + + public String getSubjects() { + return subjects; + } + + public void setSubjects(String subjects) { + this.subjects = subjects; + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java new file mode 100644 index 0000000000..156ba36320 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java new file mode 100644 index 0000000000..ae47262bf2 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} From 733bcaecf621657dfbaf1aec886b3071216dd924 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:58:52 +0200 Subject: [PATCH 115/162] GetCSV refactoring - added test class (all the tests are disabled since they refer to remote resource) --- .../dhp/common/collection/GetCSVTest.java | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java new file mode 100644 index 0000000000..bf5e3dedb2 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -0,0 +1,245 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.nio.file.Files; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.common.collection.models.CSVProgramme; +import eu.dnetlib.dhp.common.collection.models.CSVProject; +import eu.dnetlib.dhp.common.collection.models.DOAJModel; +import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; + +public class GetCSVTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(GetCSVTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + Assertions + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + Assertions + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + Assertions.assertTrue(csvp.getShortTitle().equals("")); + Assertions.assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + Assertions + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + Assertions + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + Assertions.assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + Assertions + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + Assertions + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + Assertions.assertTrue(csvp.getLanguage().equals("en")); + } + Assertions.assertTrue(csvp.getCode() != null); + Assertions.assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) + // new BufferedReader(new FileReader(fileURL)) + , workingDir + "/projects", + "eu.dnetlib.dhp.common.collection.models.CSVProject", ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + Assertions.assertTrue(csvp.getId().equals("771736")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + Assertions.assertTrue(csvp.getId().equals("752903")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + Assertions.assertTrue(csvp.getId().equals("861952")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + Assertions.assertTrue(csvp.getId() != null); + Assertions.assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ','); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + Assertions.assertTrue(unibi.getIssn().equals("0001-625X")); + Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X")); + Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + Assertions.assertTrue(unibi.getIssn().equals("2088-6330")); + Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330")); + Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + Assertions.assertTrue(unibi.getIssn().equals("2658-7068")); + Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488")); + Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + Assertions.assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + GetCSV + .getCsv( + fs, new BufferedReader( + new FileReader("/tmp/DOAJ_1.csv")), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.DOAJModel", ','); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + Assertions.assertTrue(doaj.getIssn().equals("0001-3765")); + Assertions.assertTrue(doaj.getEissn().equals("1678-2690")); + Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências")); + + } + if (count == 7902) { + + Assertions.assertTrue(doaj.getIssn().equals("")); + Assertions.assertTrue(doaj.getEissn().equals("2055-7159")); + Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports")); + } + if (count == 16703) { + + Assertions.assertTrue(doaj.getIssn().equals("")); + Assertions.assertTrue(doaj.getEissn().equals("2788-6298")); + Assertions + .assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa")); + } + + count += 1; + } + + Assertions.assertEquals(16709, count); + } + +} From 7402daf51a34df27fa303c53143ed61ed5c48e3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:59:19 +0200 Subject: [PATCH 116/162] GetCSV refactoring - added dependency to open-csv lib --- dhp-common/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 4c7810c47c..c057123b1d 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,6 +117,11 @@ eu.dnetlib.dhp dhp-schemas + + + com.opencsv + opencsv + From d36e925277b71168a0dcb440a2f5e4d1624d5359 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:00:21 +0200 Subject: [PATCH 117/162] GetCSV refactoring - moved under model package --- .../dhp/actionmanager/project/utils/{ => model}/EXCELTopic.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/{ => model}/EXCELTopic.java (97%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java index 5607df1184..fa2e3422e8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; From b62cd656a78b763c0db77dd9f625bb2cfab99937 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:01:10 +0200 Subject: [PATCH 118/162] GetCSV refactoring - changed the model to store only the information needed --- .../utils/{ => model}/CSVProgramme.java | 24 +++++---- .../project/utils/model/CSVProject.java | 51 +++++++++++++++++++ 2 files changed, 64 insertions(+), 11 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/{ => model}/CSVProgramme.java (77%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java similarity index 77% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index d486f01049..ea89a75eb4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -1,20 +1,31 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; +import com.opencsv.bean.CsvBindByName; + /** * The model for the programme csv file */ public class CSVProgramme implements Serializable { - private String rcn; + @CsvBindByName(column = "code") private String code; + @CsvBindByName(column = "title") private String title; + + @CsvBindByName(column = "shortTitle") private String shortTitle; + + @CsvBindByName(column = "language") private String language; + + @CsvBindByName(column = "classification") private String classification; + + @CsvBindByName(column = "classification_short") private String classification_short; public String getClassification_short() { @@ -33,14 +44,6 @@ public class CSVProgramme implements Serializable { this.classification = classification; } - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - public String getCode() { return code; } @@ -73,5 +76,4 @@ public class CSVProgramme implements Serializable { this.language = language; } -// } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java new file mode 100644 index 0000000000..73cea0539f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.actionmanager.project.utils.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + + +} From 4317211a2b5e64ad9e4a969642ab674182854593 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:03:14 +0200 Subject: [PATCH 119/162] GetCSV refactoring - refactoring due to movement --- .../dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 ++ .../eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5ce730692d..95c6f987a5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; + /** * Reads a generic excel file and maps it into classes that mirror its schema */ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 359e46fc73..9e73cbc370 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -16,8 +16,8 @@ import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs From e9fc3ef3bc4f83ab6e0e93188a5782cefdea72d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:03:41 +0200 Subject: [PATCH 120/162] GetCSV refactoring - changed to use the new class to get and write the csv file --- .../actionmanager/project/utils/ReadCSV.java | 72 +++---------------- 1 file changed, 9 insertions(+), 63 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index 1ae775bec1..c967d4caee 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -1,36 +1,21 @@ package eu.dnetlib.dhp.actionmanager.project.utils; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; +import java.io.*; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of a csv file and writes the Serialization of it in hdfs */ -public class ReadCSV implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - - private final BufferedWriter writer; - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final String csvFile; - private final char delimiter; +public class ReadCSV { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -50,57 +35,18 @@ public class ReadCSV implements Closeable { char del = ';'; if (delimiter.isPresent()) del = delimiter.get().charAt(0); - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) { - log.info("Getting CSV file..."); - readCSV.execute(classForName); - } - - } - - public void execute(final String classForName) - throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { - CSVParser csvParser = new CSVParser(); - csvParser - .parse(csvFile, classForName, delimiter) - .stream() - .forEach(this::write); - } - - @Override - public void close() throws IOException { - writer.close(); - } - - public ReadCSV( - final String hdfsPath, - final String hdfsNameNode, - final String fileURL, - char delimiter) - throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); - HttpConnector2 httpConnector = new HttpConnector2(); + FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); + BufferedReader reader = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))); - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del); - this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); - this.csvFile = httpConnector.getInputSource(fileURL); - this.delimiter = delimiter; - } + reader.close(); - protected void write(final Object p) { - try { - writer.write(OBJECT_MAPPER.writeValueAsString(p)); - writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); - } } } From 7a789423aa37fa3f207f317b2f9181c4420c3fe5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:04:27 +0200 Subject: [PATCH 121/162] GetCSV refactoring - refactoring due to movement of classes --- .../eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java | 2 +- .../eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 40cf5ee532..686b0fc7f6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index b1a381415c..8efc76a0ec 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; From f0845e9865b5714c8e1f14ffed9eddfc4b3cafd6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:04:58 +0200 Subject: [PATCH 122/162] GetCSV refactoring - refactoring due to movement of classes --- .../dhp/actionmanager/project/SparkAtomicActionJob.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index a4a0bf6a4a..cc1411b318 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Arrays; -import java.util.HashMap; import java.util.Objects; import java.util.Optional; @@ -22,9 +21,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; -import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; From 335a824e348c9e1eaf81d6c021f48323a7e599d6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:10:10 +0200 Subject: [PATCH 123/162] GetCSV refactoring - fixed issue --- .../eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 95c6f987a5..a520176f43 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -32,7 +32,7 @@ public class EXCELParser { XSSFSheet sheet = wb.getSheet(sheetName); - if (sheetName == null) { + if (sheet == null) { throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } From ab8abd61bbf29b6f6c1ae93b51eb63fc3f219ab8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:11:07 +0200 Subject: [PATCH 124/162] GetCSV refactoring - refactoring due to movement of classes --- .../dhp/actionmanager/project/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index e4f2715fb3..bd864a6aae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -58,7 +58,7 @@ --hdfsNameNode${nameNode} --fileURL${projectFileURL} --hdfsPath${workingDir}/projects - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProject + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject @@ -70,7 +70,7 @@ --hdfsNameNode${nameNode} --fileURL${programmeFileURL} --hdfsPath${workingDir}/programme - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme @@ -83,7 +83,7 @@ --fileURL${topicFileURL} --hdfsPath${workingDir}/topic --sheetName${sheetName} - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic From 9da74b544adf252f84cec924598d88e71293b015 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:12:15 +0200 Subject: [PATCH 125/162] GetCSV refactoring - refactoring due to movement of classes --- .../dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 6 +++--- .../actionmanager/project/PrepareH2020ProgrammeTest.java | 2 +- .../dhp/actionmanager/project/PrepareProjectTest.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index cc36421a0a..d27a732ea3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -13,8 +13,8 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; @Disabled public class EXCELParserTest { @@ -25,7 +25,7 @@ public class EXCELParserTest { @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + workingDir = Files.createTempDirectory(EXCELParserTest.class.getSimpleName()); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index f128b56105..680872126f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; public class PrepareH2020ProgrammeTest { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index f0f3532aad..ee5f78b0c9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; public class PrepareProjectTest { From d57b2bb9273c33d3348a3abf282245eaec1e2ae2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:12:51 +0200 Subject: [PATCH 126/162] GetCSV refactoring - removing not needed dependency --- dhp-workflows/dhp-aggregation/pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 2942870087..98e22d8a35 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -84,14 +84,6 @@ json - - - org.apache.commons - commons-csv - 1.8 - - - org.apache.poi From 6b9e1bf2e3787bb5eb802508721a174981b7bffe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:17:50 +0200 Subject: [PATCH 127/162] GetCSV refactoring - removing not needed dependency --- .../java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 5491696736..dbf053a72c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -6,6 +6,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; + public abstract class ReportingJob { /** From 8769dd8eef6668bbce2e238175f2d36948c2b258 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:20:56 +0200 Subject: [PATCH 128/162] GetCSV refactoring - refactoring due to movement of classes --- .../java/eu/dnetlib/dhp/collection/CollectorWorker.java | 4 +++- .../dnetlib/dhp/collection/CollectorWorkerApplication.java | 4 +++- .../eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java | 4 ++-- .../collection/plugin/mongodb/MDStoreCollectorPlugin.java | 4 ++-- .../plugin/mongodb/MongoDbDumpCollectorPlugin.java | 4 ++-- .../dhp/collection/plugin/oai/OaiCollectorPlugin.java | 6 +++--- .../eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 6 +++--- .../dhp/collection/plugin/oai/OaiIteratorFactory.java | 6 +++--- .../dhp/collection/plugin/rest/RestCollectorPlugin.java | 6 +++--- .../eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java | 4 ++-- .../dnetlib/dhp/transformation/TransformSparkJobNode.java | 2 +- .../eu/dnetlib/dhp/collection/CollectionWorkflowTest.java | 1 + .../dhp/collection/plugin/rest/RestCollectorPluginTest.java | 6 +++--- .../dhp/collection/plugin/rest/RestIteratorTest.java | 2 +- 14 files changed, 32 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index d0872da1da..2ea3f35ccb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.aggregation.common.ReporterCallback; import eu.dnetlib.dhp.aggregation.common.ReportingJob; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; @@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; public class CollectorWorker extends ReportingJob { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java index 545cbab0ca..708d2da65d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java @@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 457f634685..841d42fea8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin; import java.util.stream.Stream; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public interface CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index 549c597204..ad28a72619 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -13,11 +13,11 @@ import org.slf4j.LoggerFactory; import com.mongodb.client.MongoCollection; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.common.MdstoreClient; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public class MDStoreCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index ec5bab448e..0364041410 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.utils.DHPUtils; public class MongoDbDumpCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 9918e4abe2..878e286e0a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -13,11 +13,11 @@ import com.google.common.base.Splitter; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; public class OaiCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 331dee6b47..3f767fd319 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -19,10 +19,10 @@ import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.collection.XmlCleaner; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIterator implements Iterator { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java index 48f6a94c86..1838223c23 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.util.Iterator; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.HttpClientParams; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIteratorFactory { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java index be2bbcece1..997948687b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java @@ -9,11 +9,11 @@ import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * TODO: delegate HTTP requests to the common HttpConnector2 implementation. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index a90d259b44..64a041fd4a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -30,9 +30,9 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.JsonUtils; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * log.info(...) equal to log.trace(...) in the application-logs diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index a01703675d..4fe79bf769 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -23,8 +23,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java index 05dd6b1d33..1a10b5f644 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java @@ -18,6 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index f2b873e109..f708c367b3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -12,10 +12,10 @@ import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * @author js, Andreas Czerniak diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java index 9f75bd468b..906f69dc9e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java @@ -9,7 +9,7 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * From ed183d878e5899aa998d3dd6b0de0ffa10353752 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 09:28:51 +0200 Subject: [PATCH 129/162] GetCSV refactoring - modified test classes due to change in the model of projects and programme --- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../project/PrepareProjectTest.java | 1 + .../project/SparkUpdateProjectTest.java | 2 +- .../project/preparedProgramme_whole.json | 277 ++++++++++++++++++ .../project/prepared_projects.json | 34 +-- .../project/projects_subset.json | 32 +- .../project/whole_programme.json.gz | Bin 31593 -> 29456 bytes 7 files changed, 313 insertions(+), 35 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 680872126f..68aacdc8b5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -92,7 +92,7 @@ public class PrepareH2020ProgrammeTest { Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); - // tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); + //tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index ee5f78b0c9..d0c50a0549 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index a77dbace48..58365e0266 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -78,7 +78,7 @@ public class SparkUpdateProjectTest { "-programmePath", getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz") + "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json") .getPath(), "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json new file mode 100644 index 0000000000..e8e04e8dbb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json @@ -0,0 +1,277 @@ +{"code":"H2020-EU.5.g.","title":"Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","classification_short":"Science with and for Society | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts"} +{"code":"H2020-EU.3.4.2.1.","title":"A substantial reduction of traffic congestion","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | A substantial reduction of traffic congestion","classification_short":"Societal Challenges | Transport | Mobility, safety and security | A substantial reduction of traffic congestion"} +{"code":"H2020-EU.3.4.5.4.","title":"ITD Airframe","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Airframe","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Airframe"} +{"code":"H2020-EU.3.3.8.1.","title":"Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs"} +{"code":"H2020-EU.3.7.1.","title":"Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","classification_short":"Societal Challenges | Secure societies | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs"} +{"code":"H2020-EU.3.4.1.1.","title":"Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration"} +{"code":"H2020-EU.1.4.3.","title":"Reinforcing European research infrastructure policy and international cooperation","shortTitle":"Research infrastructure policy and international cooperation","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation"} +{"code":"H2020-EU.1.4.","title":"EXCELLENT SCIENCE - Research Infrastructures","shortTitle":"Research Infrastructures","language":"en","classification":"Excellent science | Research Infrastructures","classification_short":"Excellent Science | Research Infrastructures"} +{"code":"H2020-EU.3.4.6.1.","title":"Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies"} +{"code":"H2020-EU.3.4.5.5.","title":"ITD Engines","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Engines","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Engines"} +{"code":"H2020-EU.2.1.1.7.3.","title":"Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking"} +{"code":"H2020-EU.3.1.6.1.","title":"Promoting integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Promoting integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Promoting integrated care"} +{"code":"H2020-EU.3.7.6.","title":"Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","classification_short":"Societal Challenges | Secure societies | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management"} +{"code":"H2020-EU.3.4.2.3.","title":"Developing new concepts of freight transport and logistics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Developing new concepts of freight transport and logistics","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Developing new concepts of freight transport and logistics"} +{"code":"H2020-EU.3.3.2.1.","title":"Develop the full potential of wind energy","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy"} +{"code":"H2020-EU.3.2.5.","title":"Cross-cutting marine and maritime research","shortTitle":"Cross-cutting marine and maritime research","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research"} +{"code":"H2020-EU.3.4.7.","title":"SESAR JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU","classification_short":"Societal Challenges | Transport | SESAR JU"} +{"code":"H2020-EU.2.1.3.3.","title":"Management of materials components","shortTitle":"Management of materials components","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Management of materials components","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Management of materials components"} +{"code":"H2020-EU.3.3.3.","title":"Alternative fuels and mobile energy sources","shortTitle":"Alternative fuels and mobile energy sources","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources"} +{"code":"H2020-EU.7.","title":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","shortTitle":"European Institute of Innovation and Technology (EIT)","language":"en","classification":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","classification_short":"European Institute of Innovation and Technology (EIT)"} +{"code":"H2020-EU.3.5.4.1.","title":"Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake"} +{"code":"H2020-EU.3.1.4.","title":"Active ageing and self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health"} +{"code":"H2020-EU.1.","title":"Excellent science","shortTitle":"Excellent Science","language":"en","classification":"Excellent science","classification_short":"Excellent Science"} +{"code":"H2020-EU.3.5.6.1.","title":"Identifying resilience levels via observations, monitoring and modelling","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling"} +{"code":"H2020-EU.3.2.4.3.","title":"Supporting market development for bio-based products and processes","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Supporting market development for bio-based products and processes","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Supporting market development for bio-based products and processes"} +{"code":"H2020-EU.2.1.6.1.","title":"Enabling European competitiveness, non-dependence and innovation of the European space sector","shortTitle":"Competitiveness, non-dependence and innovation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation"} +{"code":"H2020-EU.4.b.","title":"Twinning of research institutions","shortTitle":"Twinning of research institutions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Twinning of research institutions","classification_short":"Spreading excellence and widening participation | Twinning of research institutions"} +{"code":"H2020-EU.3.1.7.6.","title":"Psychiatric diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases"} +{"code":"H2020-EU.3.1.2.2.","title":"Improving diagnosis and prognosis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Improving diagnosis and prognosis","classification_short":"Societal Challenges | Health | Preventing disease | Improving diagnosis and prognosis"} +{"code":"H2020-EU.3.4.5.3.","title":"IADP Fast Rotorcraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Fast Rotorcraft"} +{"code":"H2020-EU.3.1.3.1.","title":"Treating disease, including developing regenerative medicine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine","classification_short":"Societal Challenges | Health | Treating and managing disease | Treating disease, including developing regenerative medicine"} +{"code":"H2020-EU.3.4.3.3.","title":"Advanced production processes","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Advanced production processes","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Advanced production processes"} +{"code":"H2020-EU.3.1.7.","title":"Innovative Medicines Initiative 2 (IMI2)","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2)","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2)"} +{"code":"H2020-EU.3.6.3.2.","title":"Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity"} +{"code":"H2020-EU.3.5.1.2.","title":"Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures"} +{"code":"H2020-EU.3.6.1.","title":"Inclusive societies","shortTitle":"Inclusive societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies"} +{"code":"H2020-EU.3.2.","title":"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","shortTitle":"Food, agriculture, forestry, marine research and bioeconomy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy"} +{"code":"H2020-EU.2.1.6.1.2.","title":"Boost innovation between space and non-space sectors","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Boost innovation between space and non-space sectors","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Boost innovation between space and non-space sectors"} +{"code":"H2020-EU.2.1.3.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced materials","shortTitle":"Advanced materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials"} +{"code":"H2020-EU.2.1.2.3.","title":"Developing the societal dimension of nanotechnology","shortTitle":"Societal dimension of nanotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing the societal dimension of nanotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Societal dimension of nanotechnology"} +{"code":"H2020-EU.4.","title":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","shortTitle":"Spreading excellence and widening participation","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","classification_short":"Spreading excellence and widening participation"} +{"code":"H2020-EU.3.6.1.2.","title":"Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change"} +{"code":"H2020-EU.3.4.2.","title":"Better mobility, less congestion, more safety and security","shortTitle":"Mobility, safety and security","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security","classification_short":"Societal Challenges | Transport | Mobility, safety and security"} +{"code":"H2020-EU.3.1.7.13.","title":"Other","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Other","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Other"} +{"code":"H2020-EU.3.3.3.3.","title":"New alternative fuels","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | New alternative fuels","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | New alternative fuels"} +{"code":"H2020-EU.2.1.3.5.","title":"Materials for creative industries, including heritage","shortTitle":"Materials for creative industries, including heritage","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for creative industries, including heritage","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for creative industries, including heritage"} +{"code":"H2020-EU.3.3.3.2.","title":"Reducing time to market for hydrogen and fuel cells technologies","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies"} +{"code":"H2020-EU.5.d.","title":"Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","classification_short":"Science with and for Society | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels"} +{"code":"H2020-EU.3.1.","title":"SOCIETAL CHALLENGES - Health, demographic change and well-being","shortTitle":"Health","language":"en","classification":"Societal challenges | Health, demographic change and well-being","classification_short":"Societal Challenges | Health"} +{"code":"H2020-EU.3.5.3.1.","title":"Improve the knowledge base on the availability of raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials"} +{"code":"H2020-EU.3.2.1.4.","title":"Sustainable forestry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Sustainable forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Sustainable forestry"} +{"code":"H2020-EU.3.3.","title":"SOCIETAL CHALLENGES - Secure, clean and efficient energy","shortTitle":"Energy","language":"en","classification":"Societal challenges | Secure, clean and efficient energy","classification_short":"Societal Challenges | Energy"} +{"code":"H2020-EU.3.4.8.1.","title":"Innovation Programme 1 (IP1): Cost-efficient and reliable trains","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains"} +{"code":"H2020-EU.2.3.2.1.","title":"Support for research intensive SMEs","shortTitle":"Support for research intensive SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Support for research intensive SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Support for research intensive SMEs"} +{"code":"H2020-EU.2.1.3.2.","title":"Materials development and transformation","shortTitle":"Materials development and transformation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials development and transformation","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials development and transformation"} +{"code":"H2020-EU.1.4.1.3.","title":"Development, deployment and operation of ICT-based e-infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures"} +{"code":"H2020-EU.3.5.4.2.","title":"Support innovative policies and societal changes","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Support innovative policies and societal changes","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Support innovative policies and societal changes"} +{"code":"H2020-EU.2.1.3.6.","title":"Metrology, characterisation, standardisation and quality control","shortTitle":"Metrology, characterisation, standardisation and quality control","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Metrology, characterisation, standardisation and quality control","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Metrology, characterisation, standardisation and quality control"} +{"code":"H2020-EU.3.4.5.8.","title":"ECO Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ECO Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ECO Transverse Area"} +{"code":"H2020-EU.5.f.","title":"Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","classification_short":"Science with and for Society | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation"} +{"code":"H2020-EU.5.h.","title":"Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","classification_short":"Science with and for Society | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public"} +{"code":"H2020-EU.2.1.1.7.1.","title":"Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration"} +{"code":"H2020-EU.3.7.5.","title":"Increase Europe's resilience to crises and disasters","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Increase Europe's resilience to crises and disasters","classification_short":"Societal Challenges | Secure societies | Increase Europe's resilience to crises and disasters"} +{"code":"H2020-EU.1.4.2.2.","title":"Strengthening the human capital of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Strengthening the human capital of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Strengthening the human capital of research infrastructures"} +{"code":"H2020-EU.3.4.1.2.","title":"Developing smart equipment, infrastructures and services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services"} +{"code":"H2020-EU.2.3.2.2.","title":"Enhancing the innovation capacity of SMEs","shortTitle":"Enhancing the innovation capacity of SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Enhancing the innovation capacity of SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Enhancing the innovation capacity of SMEs"} +{"code":"H2020-EU.1.3.5.","title":"Specific support and policy actions","shortTitle":"MSCA Specific support","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Specific support and policy actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Specific support"} +{"code":"H2020-EU.3.2.3.3.","title":"Boosting marine and maritime innovation through biotechnology","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology"} +{"code":"H2020-EU.3.2.1.2.","title":"Providing ecosystems services and public goods","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods"} +{"code":"H2020-EU.2.3.2.3.","title":"Supporting market-driven innovation","shortTitle":"Supporting market-driven innovation","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Supporting market-driven innovation","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Supporting market-driven innovation"} +{"code":"H2020-EU.5.a.","title":"Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","classification_short":"Science with and for Society | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations"} +{"code":"H2020-EU.3.1.7.9.","title":"Ageing-associated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases"} +{"code":"H2020-EU.2.2.1.","title":"The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","shortTitle":"Debt facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Debt facility"} +{"code":"H2020-Euratom-1.8.","title":"Ensure availability and use of research infrastructures of pan_european relevance","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance","classification_short":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance"} +{"code":"H2020-EU.3.2.2.1.","title":"Informed consumer choices","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices"} +{"code":"H2020-EU.3.7.","title":"Secure societies - Protecting freedom and security of Europe and its citizens","shortTitle":"Secure societies","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens","classification_short":"Societal Challenges | Secure societies"} +{"code":"H2020-EU.1.3.4.","title":"Increasing structural impact by co-funding activities","shortTitle":"MSCA Co-funding","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Increasing structural impact by co-funding activities","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Co-funding"} +{"code":"H2020-EU.2.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies","shortTitle":"Leadership in enabling and industrial technologies (LEIT)","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT)"} +{"code":"H2020-EU.2.1.3.4.","title":"Materials for a sustainable, resource-efficient and low-emission industry","shortTitle":"Materials for a resource-efficient and low-emission industry","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for a sustainable, resource-efficient and low-emission industry","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for a resource-efficient and low-emission industry"} +{"code":"H2020-EU.3.4.5.7.","title":"Small Air Transport (SAT) Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area"} +{"code":"H2020-EU.3.4.8.3.","title":"Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure"} +{"code":"H2020-Euratom-1.1.","title":"Support safe operation of nuclear systems","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support safe operation of nuclear systems","classification_short":"Euratom | Indirect actions | Support safe operation of nuclear systems"} +{"code":"H2020-EU.2.3.1.","title":" Mainstreaming SME support, especially through a dedicated instrument","shortTitle":"Mainstreaming SME support","language":"en","classification":"Industrial leadership | Innovation In SMEs | Mainstreaming SME support, especially through a dedicated instrument","classification_short":"Industrial Leadership | Innovation in SMEs | Mainstreaming SME support"} +{"code":"H2020-EU.1.4.3.1.","title":"Reinforcing European policy for research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures"} +{"code":"H2020-Euratom-1.3.","title":"Support the development and sustainability of nuclear competences at Union level","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level","classification_short":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level"} +{"code":"H2020-EU.3.1.7.1.","title":"Antimicrobial resistance","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance"} +{"code":"H2020-EU.3.7.4.","title":"Improve cyber security","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Improve cyber security","classification_short":"Societal Challenges | Secure societies | Improve cyber security"} +{"code":"H2020-EU.2.1.1.7.2.","title":"Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services"} +{"code":"H2020-EU.3.5.4.","title":"Enabling the transition towards a green economy and society through eco-innovation","shortTitle":"A green economy and society through eco-innovation","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation"} +{"code":"H2020-EU.3.5.3.2.","title":"Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery"} +{"code":"H2020-EU.3.4.5.10.","title":"Thematic Topics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Thematic Topics","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Thematic Topics"} +{"code":"H2020-EU.3.1.5.1.","title":"Improving halth information and better use of health data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving halth information and better use of health data","classification_short":"Societal Challenges | Health | Methods and data | Improving halth information and better use of health data"} +{"code":"H2020-EU.3.3.3.1.","title":"Make bio-energy more competitive and sustainable","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable"} +{"code":"H2020-EU.3.6.2.1.","title":"Strengthen the evidence base and support for the Innovation Union and ERA","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA"} +{"code":"H2020-EU.3.1.7.12.","title":"Vaccine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Vaccine","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Vaccine"} +{"code":"H2020-EU.3.5.4.3.","title":"Measure and assess progress towards a green economy","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Measure and assess progress towards a green economy","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Measure and assess progress towards a green economy"} +{"code":"H2020-EU.3.4.8.5.","title":"Innovation Programme 5: Technologies for sustainable and attractive European rail freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight"} +{"code":"H2020-EU.3.5.4.4.","title":"Foster resource efficiency through digital systems","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Foster resource efficiency through digital systems","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Foster resource efficiency through digital systems"} +{"code":"H2020-EU.3.3.8.3.","title":"Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources"} +{"code":"H2020-Euratom","title":"Euratom","shortTitle":"","language":"en","classification":"Euratom","classification_short":"Euratom"} +{"code":"H2020-EU.3.5.6.2.","title":"Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards"} +{"code":"H2020-EU.3.2.5.2.","title":"Develop the potential of marine resources through an integrated approach","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach"} +{"code":"H2020-EU.2.1.1.5.","title":"Advanced interfaces and robots: Robotics and smart spaces","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Advanced interfaces and robots: Robotics and smart spaces","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Advanced interfaces and robots: Robotics and smart spaces"} +{"code":"H2020-EU.3.3.5.","title":"New knowledge and technologies","shortTitle":"New knowledge and technologies","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | New knowledge and technologies","classification_short":"Societal Challenges | Energy | New knowledge and technologies"} +{"code":"H2020-EU.1.2.2.","title":"FET Proactive","shortTitle":"FET Proactive","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Proactive","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Proactive"} +{"code":"H2020-EU.3.6.1.3.","title":"Europe's role as a global actor, notably regarding human rights and global justice","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice"} +{"code":"H2020-EU.2.1.4.1.","title":"Boosting cutting-edge biotechnologies as a future innovation driver","shortTitle":"Cutting-edge biotechnologies as future innovation driver","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Boosting cutting-edge biotechnologies as a future innovation driver","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Cutting-edge biotechnologies as future innovation driver"} +{"code":"H2020-EU.3.1.3.","title":"Treating and managing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease","classification_short":"Societal Challenges | Health | Treating and managing disease"} +{"code":"H2020-EU.3.3.4.","title":"A single, smart European electricity grid","shortTitle":"A single, smart European electricity grid","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | A single, smart European electricity grid","classification_short":"Societal Challenges | Energy | A single, smart European electricity grid"} +{"code":"H2020-EU.3.2.6.","title":"Bio-based Industries Joint Technology Initiative (BBI-JTI)","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)"} +{"code":"H2020-EU.1.3.2.","title":"Nurturing excellence by means of cross-border and cross-sector mobility","shortTitle":"MSCA Mobility","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Nurturing excellence by means of cross-border and cross-sector mobility","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Mobility"} +{"code":"H2020-EU.2.1.3.7.","title":"Optimisation of the use of materials","shortTitle":"Optimisation of the use of materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Optimisation of the use of materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Optimisation of the use of materials"} +{"code":"H2020-EU.2.1.2.4.","title":"Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","shortTitle":"Synthesis and manufacturing of nanomaterials, components and systems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Synthesis and manufacturing of nanomaterials, components and systems"} +{"code":"H2020-EU.1.4.1.","title":"Developing the European research infrastructures for 2020 and beyond","shortTitle":"Research infrastructures for 2020 and beyond","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond"} +{"code":"H2020-EU.3.1.1.1.","title":"Understanding the determinants of health, improving health promotion and disease prevention","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention"} +{"code":"H2020-EU.5.c.","title":"Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","classification_short":"Science with and for Society | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology"} +{"code":"H2020-EU.5.","title":"SCIENCE WITH AND FOR SOCIETY","shortTitle":"Science with and for Society","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY","classification_short":"Science with and for Society"} +{"code":"H2020-EU.3.5.3.3.","title":"Find alternatives for critical raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials"} +{"code":"H2020-EU.3.2.3.1.","title":"Developing sustainable and environmentally-friendly fisheries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries"} +{"code":"H2020-EU.2.1.2.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Nanotechnologies","shortTitle":"Nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies"} +{"code":"H2020-EU.3.4.3.2.","title":"On board, smart control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | On board, smart control systems","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | On board, smart control systems"} +{"code":"H2020-EU.3.2.4.1.","title":"Fostering the bio-economy for bio-based industries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Fostering the bio-economy for bio-based industries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Fostering the bio-economy for bio-based industries"} +{"code":"H2020-EU.3.1.6.2.","title":"Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches"} +{"code":"H2020-EU.2.1.5.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced manufacturing and processing","shortTitle":"Advanced manufacturing and processing","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing"} +{"code":"H2020-EU.3.5.2.2.","title":"Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services"} +{"code":"H2020-EU.3.1.7.3.","title":"Cardiovascular diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases"} +{"code":"H2020-EU.3.3.8.2.","title":"Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market"} +{"code":"H2020-EU.2.1.6.3.","title":"Enabling exploitation of space data","shortTitle":"Enabling exploitation of space data","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling exploitation of space data","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling exploitation of space data"} +{"code":"H2020-EU.2.1.2.5.","title":"Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","shortTitle":"Capacity-enhancing techniques, measuring methods and equipment","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Capacity-enhancing techniques, measuring methods and equipment"} +{"code":"H2020-EU.3.6.2.","title":"Innovative societies","shortTitle":"Innovative societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies"} +{"code":"H2020-EU.3.1.2.1.","title":"Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","classification_short":"Societal Challenges | Health | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility"} +{"code":"H2020-EU.3.6.1.4.","title":"The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design"} +{"code":"H2020-EU.3.3.2.4.","title":"Develop geothermal, hydro, marine and other renewable energy options","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options"} +{"code":"H2020-EU.5.b.","title":"Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","classification_short":"Science with and for Society | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities"} +{"code":"H2020-EU.1.3.3.","title":"Stimulating innovation by means of cross-fertilisation of knowledge","shortTitle":"MSCA Knowledge","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Stimulating innovation by means of cross-fertilisation of knowledge","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Knowledge"} +{"code":"H2020-EU.3.1.4.2.","title":"Individual awareness and empowerment for self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health"} +{"code":"H2020-EU.3.1.7.8.","title":"Immune-mediated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases"} +{"code":"H2020-EU.3.4.","title":"SOCIETAL CHALLENGES - Smart, Green And Integrated Transport","shortTitle":"Transport","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport","classification_short":"Societal Challenges | Transport"} +{"code":"H2020-EU.3.2.6.1.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy"} +{"code":"H2020-EU.2.1.2.1.","title":"Developing next generation nanomaterials, nanodevices and nanosystems ","shortTitle":"Next generation nanomaterials, nanodevices and nanosystems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing next generation nanomaterials, nanodevices and nanosystems ","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Next generation nanomaterials, nanodevices and nanosystems"} +{"code":"H2020-Euratom-1.5.","title":"Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","classification_short":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities"} +{"code":"H2020-EU.3.5.","title":"SOCIETAL CHALLENGES - Climate action, Environment, Resource Efficiency and Raw Materials","shortTitle":"Climate and environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials","classification_short":"Societal Challenges | Climate and environment"} +{"code":"H2020-EU.2.1.1.6.","title":"Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies"} +{"code":"H2020-EU.3.4.2.4.","title":"Reducing accident rates, fatalities and casualties and improving security","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Reducing accident rates, fatalities and casualties and improving security","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Reducing accident rates, fatalities and casualties and improving security"} +{"code":"H2020-EU.3.6.2.2.","title":"Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail"} +{"code":"H2020-EU.3.5.1.1.","title":"Improve the understanding of climate change and the provision of reliable climate projections","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections"} +{"code":"H2020-EU.3.4.3.4.","title":"Exploring entirely new transport concepts","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts"} +{"code":"H2020-EU.3.5.2.1.","title":"Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being"} +{"code":"H2020-EU.3.2.2.3.","title":"A sustainable and competitive agri-food industry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry"} +{"code":"H2020-EU.1.4.1.1.","title":"Developing new world-class research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Developing new world-class research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Developing new world-class research infrastructures"} +{"code":"H2020-EU.3.1.2.3.","title":"Developing better preventive and therapeutic vaccines","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing better preventive and therapeutic vaccines","classification_short":"Societal Challenges | Health | Preventing disease | Developing better preventive and therapeutic vaccines"} +{"code":"H2020-EU.1.4.3.2.","title":"Facilitate strategic international cooperation","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Facilitate strategic international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Facilitate strategic international cooperation"} +{"code":"H2020-EU.3.5.2.","title":"Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","shortTitle":"Protection of the environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","classification_short":"Societal Challenges | Climate and environment | Protection of the environment"} +{"code":"H2020-Euratom-1.9.","title":"European Fusion Development Agreement","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | European Fusion Development Agreement","classification_short":"Euratom | Indirect actions | European Fusion Development Agreement"} +{"code":"H2020-EU.3.2.1.1.","title":"Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience"} +{"code":"H2020-EU.3.2.2.2.","title":"Healthy and safe foods and diets for all","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all"} +{"code":"H2020-EU.2.1.4.2.","title":"Bio-technology based industrial products and processes","shortTitle":"Bio-technology based industrial products and processes","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Bio-technology based industrial products and processes","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Bio-technology based industrial products and processes"} +{"code":"H2020-EU.3.4.5.1.","title":"IADP Large Passenger Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Large Passenger Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Large Passenger Aircraft"} +{"code":"H2020-EU.3.1.1.3.","title":"Improving surveillance and preparedness","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Improving surveillance and preparedness","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Improving surveillance and preparedness"} +{"code":"H2020-EU.2.1.6.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Space","shortTitle":"Space","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space"} +{"code":"H2020-EU.3.1.5.2.","title":"Improving scientific tools and methods to support policy making and regulatory needs","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs","classification_short":"Societal Challenges | Health | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs"} +{"code":"H2020-EU.3.","title":"Societal challenges","shortTitle":"Societal Challenges","language":"en","classification":"Societal challenges","classification_short":"Societal Challenges"} +{"code":"H2020-EU.1.3.","title":"EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions","shortTitle":"Marie-Sklodowska-Curie Actions","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions"} +{"code":"H2020-EU.4.f.","title":"Strengthening the administrative and operational capacity of transnational networks of National Contact Points","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Strengthening the administrative and operational capacity of transnational networks of National Contact Points","classification_short":"Spreading excellence and widening participation | Strengthening the administrative and operational capacity of transnational networks of National Contact Points"} +{"code":"H2020-EU.1.2.","title":"EXCELLENT SCIENCE - Future and Emerging Technologies (FET)","shortTitle":"Future and Emerging Technologies (FET)","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET)","classification_short":"Excellent Science | Future and Emerging Technologies (FET)"} +{"code":"H2020-EU.3.3.1.1.","title":"Bring to mass market technologies and services for a smart and efficient energy use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Bring to mass market technologies and services for a smart and efficient energy use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Bring to mass market technologies and services for a smart and efficient energy use"} +{"code":"H2020-EU.3.3.2.2.","title":"Develop efficient, reliable and cost-competitive solar energy systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems"} +{"code":"H2020-EU.4.c.","title":"Establishing ‚ERA Chairs’","shortTitle":"ERA chairs","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Establishing ‚ERA Chairs’","classification_short":"Spreading excellence and widening participation | ERA chairs"} +{"code":"H2020-EU.3.4.5","title":"CLEANSKY2","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2","classification_short":"Societal Challenges | Transport | CLEANSKY2"} +{"code":"H2020-EU.3.4.5.2.","title":"IADP Regional Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Regional Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Regional Aircraft"} +{"code":"H2020-EU.3.5.1.","title":"Fighting and adapting to climate change","shortTitle":"Fighting and adapting to climate change","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change"} +{"code":"H2020-EU.3.3.1.","title":"Reducing energy consumption and carbon foorpint by smart and sustainable use","shortTitle":"Reducing energy consumption and carbon footprint","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint"} +{"code":"H2020-EU.3.4.1.","title":"Resource efficient transport that respects the environment","shortTitle":"Resource efficient transport that respects the environment","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment"} +{"code":"H2020-EU.3.2.6.2.","title":"Fostering the bio-economy for bio-based industrie","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie"} +{"code":"H2020-EU.3.4.7.1","title":"Exploratory Research","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU | Exploratory Research","classification_short":"Societal Challenges | Transport | SESAR JU | Exploratory Research"} +{"code":"H2020-EU.1.2.1.","title":"FET Open","shortTitle":"FET Open","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Open","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Open"} +{"code":"H2020-EU.3.4.3.1.","title":"Developing the next generation of transport means as the way to secure market share in the future","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future"} +{"code":"H2020-EU.3.2.4.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","shortTitle":"Bio-based industries and supporting bio-economy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy"} +{"code":"H2020-EC","title":"Horizon 2020 Framework Programme","shortTitle":"EC Treaty","language":"en","classification":"Horizon 2020 Framework Programme","classification_short":"EC Treaty"} +{"code":"H2020-EU.3.6.2.4.","title":"Promote coherent and effective cooperation with third countries","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Promote coherent and effective cooperation with third countries","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Promote coherent and effective cooperation with third countries"} +{"code":"H2020-EU.3.1.7.5.","title":"Neurodegenerative diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases"} +{"code":"H2020-EU.2.1.6.4.","title":"Enabling European research in support of international space partnerships","shortTitle":"Research in support of international space partnerships","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European research in support of international space partnerships","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Research in support of international space partnerships"} +{"code":"H2020-EU.2.1.5.1.","title":"Technologies for Factories of the Future","shortTitle":"Technologies for Factories of the Future","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies for Factories of the Future","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies for Factories of the Future"} +{"code":"H2020-EU.2.3.2.","title":"Specific support","shortTitle":"","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support"} +{"code":"H2020-EU.1.4.2.","title":"Fostering the innovation potential of research infrastructures and their human resources","shortTitle":"Research infrastructures and their human resources","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources"} +{"code":"H2020-EU.3.3.1.2.","title":"Unlock the potential of efficient and renewable heating-cooling systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Unlock the potential of efficient and renewable heating-cooling systems","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Unlock the potential of efficient and renewable heating-cooling systems"} +{"code":"H2020-EU.3.2.3.2.","title":"Developing competitive and environmentally-friendly European aquaculture","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture"} +{"code":"H2020-EU.3.2.1.3.","title":"Empowerment of rural areas, support to policies and rural innovation","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation"} +{"code":"H2020-EU.3.2.5.3.","title":"Cross-cutting concepts and technologies enabling maritime growth","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth"} +{"code":"H2020-EU.2.1.3.1.","title":"Cross-cutting and enabling materials technologies","shortTitle":"Cross-cutting and enabling materials technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Cross-cutting and enabling materials technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Cross-cutting and enabling materials technologies"} +{"code":"H2020-EU.3.1.1.2.","title":"Understanding disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding disease"} +{"code":"H2020-Euratom-1.6.","title":"Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","classification_short":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design"} +{"code":"H2020-EU.3.5.7.1.","title":"Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements"} +{"code":"H2020-EU.2.2.","title":"INDUSTRIAL LEADERSHIP - Access to risk finance","shortTitle":"Access to risk finance","language":"en","classification":"Industrial leadership | Access to risk finance","classification_short":"Industrial Leadership | Access to risk finance"} +{"code":"H2020-EU.3.4.6.","title":"FCH2 (transport objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives)","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives)"} +{"code":"H2020-EU.4.d.","title":"A Policy Support Facility","shortTitle":"Policy Support Facility (PSF)","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | A Policy Support Facility","classification_short":"Spreading excellence and widening participation | Policy Support Facility (PSF)"} +{"code":"H2020-EU.2.1.1.7.","title":"ECSEL","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL"} +{"code":"H2020-EU.3.1.5.","title":"Methods and data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data","classification_short":"Societal Challenges | Health | Methods and data"} +{"code":"H2020-EU.3.7.7.","title":"Enhance stadardisation and interoperability of systems, including for emergency purposes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Enhance stadardisation and interoperability of systems, including for emergency purposes","classification_short":"Societal Challenges | Secure societies | Enhance stadardisation and interoperability of systems, including for emergency purposes"} +{"code":"H2020-Euratom-1.7.","title":"Promote innovation and industry competitiveness","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Promote innovation and industry competitiveness","classification_short":"Euratom | Indirect actions | Promote innovation and industry competitiveness"} +{"code":"H2020-EU.2.1.5.3.","title":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","shortTitle":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries"} +{"code":"H2020-EU.2.1.4.3.","title":"Innovative and competitive platform technologies","shortTitle":"Innovative and competitive platform technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Innovative and competitive platform technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Innovative and competitive platform technologies"} +{"code":"H2020-EU.1.2.3.","title":"FET Flagships","shortTitle":"FET Flagships","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Flagships","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Flagships"} +{"code":"H2020-EU.3.6.3.","title":"Reflective societies - cultural heritage and European identity","shortTitle":"Reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies"} +{"code":"H2020-EU.3.6.3.3.","title":"Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures"} +{"code":"H2020-EU.3.2.4.2.","title":"Developing integrated biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Developing integrated biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Developing integrated biorefineries"} +{"code":"H2020-EU.2.1.6.1.1.","title":"Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems"} +{"code":"H2020-EU.3.1.3.2.","title":"Transferring knowledge to clinical practice and scalable innovation actions","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions","classification_short":"Societal Challenges | Health | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions"} +{"code":"H2020-EU.2.","title":"Industrial leadership","shortTitle":"Industrial Leadership","language":"en","classification":"Industrial leadership","classification_short":"Industrial Leadership"} +{"code":"H2020-EU.3.4.1.3.","title":"Improving transport and mobility in urban areas","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas"} +{"code":"H2020-EU.4.e.","title":"Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","classification_short":"Spreading excellence and widening participation | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks"} +{"code":"H2020-EU.3.2.1.","title":"Sustainable agriculture and forestry","shortTitle":"Sustainable agriculture and forestry","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry"} +{"code":"H2020-EU.3.1.7.7.","title":"Respiratory diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases"} +{"code":"H2020-EU.3.4.8.6.","title":"Cross-cutting themes and activities (CCA)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)"} +{"code":"H2020-EU.3.4.8.4.","title":"Innovation Programme 4: IT Solutions for attractive railway services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services"} +{"code":"H2020-EU.3.2.2.","title":"Sustainable and competitive agri-food sector for a safe and healthy diet","shortTitle":"Sustainable and competitive agri-food sector for a safe and healthy diet","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet"} +{"code":"H2020-EU.3.4.3.","title":"Global leadership for the European transport industry","shortTitle":"Global leadership for the European transport industry","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry"} +{"code":"H2020-EU.1.4.2.1.","title":"Exploiting the innovation potential of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures"} +{"code":"H2020-EU.3.3.2.3.","title":"Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use"} +{"code":"H2020-EU.3.6.3.1.","title":"Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past"} +{"code":"H2020-EU.2.1.2.2.","title":"Ensuring the safe and sustainable development and application of nanotechnologies","shortTitle":"Safe and sustainable nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Ensuring the safe and sustainable development and application of nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Safe and sustainable nanotechnologies"} +{"code":"H2020-EU.3.1.6.","title":"Health care provision and integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care"} +{"code":"H2020-EU.3.4.5.9.","title":"Technology Evaluator","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Technology Evaluator","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Technology Evaluator"} +{"code":"H2020-EU.3.6.","title":"SOCIETAL CHALLENGES - Europe In A Changing World - Inclusive, Innovative And Reflective Societies","shortTitle":"Inclusive, innovative and reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies"} +{"code":"H2020-EU.3.4.8.","title":"Shift2Rail JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU","classification_short":"Societal Challenges | Transport | Shift2Rail JU"} +{"code":"H2020-EU.3.2.6.3.","title":"Sustainable biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries"} +{"code":"H2020-EU.4.a.","title":"Teaming of excellent research institutions and low performing RDI regions","shortTitle":"Teaming of research institutions and low performing regions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Teaming of excellent research institutions and low performing RDI regions","classification_short":"Spreading excellence and widening participation | Teaming of research institutions and low performing regions"} +{"code":"H2020-EU.3.1.7.4.","title":"Diabetes","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Diabetes","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Diabetes"} +{"code":"H2020-EU.3.7.2.","title":"Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","classification_short":"Societal Challenges | Secure societies | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes"} +{"code":"H2020-EU.3.1.2.","title":"Preventing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease","classification_short":"Societal Challenges | Health | Preventing disease"} +{"code":"H2020-EU.3.5.3.4.","title":"Improve societal awareness and skills on raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials"} +{"code":"H2020-EU.3.3.7.","title":"Market uptake of energy innovation - building on Intelligent Energy Europe","shortTitle":"Market uptake of energy innovation","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Market uptake of energy innovation - building on Intelligent Energy Europe","classification_short":"Societal Challenges | Energy | Market uptake of energy innovation"} +{"code":"H2020-EU.2.3.","title":"INDUSTRIAL LEADERSHIP - Innovation In SMEs","shortTitle":"Innovation in SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs","classification_short":"Industrial Leadership | Innovation in SMEs"} +{"code":"H2020-EU.2.1.1.3.","title":"Future Internet: Software, hardware, Infrastructures, technologies and services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Future Internet: Software, hardware, Infrastructures, technologies and services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Future Internet: Software, hardware, Infrastructures, technologies and services"} +{"code":"H2020-EU.3.1.5.3.","title":"Using in-silico medicine for improving disease management and prediction","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Using in-silico medicine for improving disease management and prediction","classification_short":"Societal Challenges | Health | Methods and data | Using in-silico medicine for improving disease management and prediction"} +{"code":"H2020-EU.3.6.1.1.","title":"The mechanisms to promote smart, sustainable and inclusive growth","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth"} +{"code":"H2020-EU.1.3.1.","title":"Fostering new skills by means of excellent initial training of researchers","shortTitle":"MCSA Initial training","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Fostering new skills by means of excellent initial training of researchers","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MCSA Initial training"} +{"code":"H2020-EU.3.6.2.3.","title":"Make use of the innovative, creative and productive potential of all generations","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations"} +{"code":"H2020-EU.3.5.1.3.","title":"Support mitigation policies, including studies that focus on impact from other sectoral policies","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies"} +{"code":"H2020-EU.3.3.1.3.","title":"Foster European Smart cities and Communities","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Foster European Smart cities and Communities","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Foster European Smart cities and Communities"} +{"code":"H2020-EU.3.1.1.","title":"Understanding health, wellbeing and disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease"} +{"code":"H2020-Euratom-1.","title":"Indirect actions","shortTitle":"","language":"en","classification":"Euratom | Indirect actions","classification_short":"Euratom | Indirect actions"} +{"code":"H2020-EU.3.5.7.","title":"FCH2 (raw materials objective)","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective)","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective)"} +{"code":"H2020-EU.3.7.3.","title":"Strengthen security through border management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Strengthen security through border management","classification_short":"Societal Challenges | Secure societies | Strengthen security through border management"} +{"code":"H2020-EU.2.1.1.2.","title":"Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing"} +{"code":"H2020-EU.3.5.5.","title":"Developing comprehensive and sustained global environmental observation and information systems","shortTitle":"Environmental observation and information systems","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Developing comprehensive and sustained global environmental observation and information systems","classification_short":"Societal Challenges | Climate and environment | Environmental observation and information systems"} +{"code":"H2020-EU.3.1.7.10.","title":"Cancer","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cancer","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cancer"} +{"code":"H2020-EU.3.4.8.2.","title":"Innovation Programme 2: Advanced traffic management and control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems"} +{"code":"H2020-EU.5.e.","title":"Develop the accessibility and the use of the results of publicly-funded research","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the accessibility and the use of the results of publicly-funded research","classification_short":"Science with and for Society | Develop the accessibility and the use of the results of publicly-funded research"} +{"code":"H2020-EU.3.4.4.","title":"Socio-economic and behavioural research and forward looking activities for policy making","shortTitle":"Socio-economic and behavioural research","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Socio-economic and behavioural research and forward looking activities for policy making","classification_short":"Societal Challenges | Transport | Socio-economic and behavioural research"} +{"code":"H2020-EU.3.3.2.","title":"Low-cost, low-carbon energy supply","shortTitle":"Low-cost, low-carbon energy supply","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply"} +{"code":"H2020-EU.3.4.2.2.","title":"Substantial improvements in the mobility of people and freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Substantial improvements in the mobility of people and freight","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Substantial improvements in the mobility of people and freight"} +{"code":"H2020-EU.3.5.6.","title":"Cultural heritage","shortTitle":"Cultural heritage","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage","classification_short":"Societal Challenges | Climate and environment | Cultural heritage"} +{"code":"H2020-EU.3.5.3.","title":"Ensuring the sustainable supply of non-energy and non-agricultural raw materials","shortTitle":"Supply of non-energy and non-agricultural raw materials","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials"} +{"code":"H2020-EU.2.1.5.2.","title":"Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","shortTitle":"Technologies enabling energy-efficient systems and buildings","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and buildings"} +{"code":"H2020-EU.1.4.1.2.","title":"Integrating and opening existing national and regional research infrastructures of European interest","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest"} +{"code":"H2020-EU.3.7.8.","title":"Support the Union's external security policies including through conflict prevention and peace-building","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Support the Union's external security policies including through conflict prevention and peace-building","classification_short":"Societal Challenges | Secure societies | Support the Union's external security policies including through conflict prevention and peace-building"} +{"code":"H2020-EU.2.1.1.1.","title":"A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems"} +{"code":"H2020-EU.1.1.","title":"EXCELLENT SCIENCE - European Research Council (ERC)","shortTitle":"European Research Council (ERC)","language":"en","classification":"Excellent science | European Research Council (ERC)","classification_short":"Excellent Science | European Research Council (ERC)"} +{"code":"H2020-EU.3.4.5.6.","title":"ITD Systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Systems","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Systems"} +{"code":"H2020-EU.6.","title":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","shortTitle":"Joint Research Centre (JRC) non-nuclear direct actions","language":"en","classification":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","classification_short":"Joint Research Centre (JRC) non-nuclear direct actions"} +{"code":"H2020-EU.3.2.5.1.","title":"Climate change impact on marine ecosystems and maritime economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy"} +{"code":"H2020-Euratom-1.2.","title":"Contribute to the development of solutions for the management of ultimate nuclear waste","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste","classification_short":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste"} +{"code":"H2020-EU.3.1.7.11.","title":"Rare/Orphan Diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases"} +{"code":"H2020-EU.3.1.4.1.","title":"Active ageing, independent and assisted living","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Active ageing, independent and assisted living","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Active ageing, independent and assisted living"} +{"code":"H2020-Euratom-1.4.","title":"Foster radiation protection","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Foster radiation protection","classification_short":"Euratom | Indirect actions | Foster radiation protection"} +{"code":"H2020-EU.2.2.2.","title":"The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","shortTitle":"Equity facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Equity facility"} +{"code":"H2020-EU.3.3.8.","title":"FCH2 (energy objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives)","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives)"} +{"code":"H2020-EU.3.2.3.","title":"Unlocking the potential of aquatic living resources","shortTitle":"Potential of aquatic living resources","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources"} +{"code":"H2020-EU.3.5.2.3.","title":"Provide knowledge and tools for effective decision making and public engagement","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Provide knowledge and tools for effective decision making and public engagement","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Provide knowledge and tools for effective decision making and public engagement"} +{"code":"H2020-EU.3.3.6.","title":"Robust decision making and public engagement","shortTitle":"Robust decision making and public engagement","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Robust decision making and public engagement","classification_short":"Societal Challenges | Energy | Robust decision making and public engagement"} +{"code":"H2020-EU.3.1.7.2.","title":"Osteoarthritis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis"} +{"code":"H2020-EU.2.1.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Information and Communication Technologies (ICT)","shortTitle":"Information and Communication Technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT)","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies"} +{"code":"H2020-EU.2.1.6.2.","title":"Enabling advances in space technology","shortTitle":"Enabling advances in space technology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling advances in space technology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling advances in space technology"} +{"code":"H2020-EU.2.1.1.4.","title":"Content technologies and information management: ICT for digital content, cultural and creative industries","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Content technologies and information management: ICT for digital content, cultural and creative industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Content technologies and information management: ICT for digital content, cultural and creative industries"} +{"code":"H2020-EU.2.1.5.4.","title":"New sustainable business models","shortTitle":"New sustainable business models","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | New sustainable business models"} +{"code":"H2020-EU.2.1.4.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Biotechnology","shortTitle":"Biotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json index 058ce88770..ca2c0f1654 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json @@ -1,17 +1,17 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"230066","id":"883730","acronym":"SOLSPACE","status":"SIGNED","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG","frameworkProgramme":"H2020","title":"Enhancing Global Clean Energy Services Using Orbiting Solar Reflectors", "startDate":"2021-03-01","endDate":"2025-11-30","projectUrl":"","objective":"fake", "totalCost":"2496392","ecMaxContribution":"2496392","call":"ERC-2019-ADG","fundingScheme":"ERC-ADG","coordinator":"UNIVERSITY OF GLASGOW","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4"} +{"id":"886776","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3"} +{"id":"887259","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"883730","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json index edf83fbc8c..ae1b7cb82a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json @@ -1,16 +1,16 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229236","id":"886776","acronym":"BIOBESTicide","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"229297","id":"954782","acronym":"MiniLLock","status":"SIGNED","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020","frameworkProgramme":"H2020","title":"Mini Launch Lock devices for small satellites","startDate":"2020-05-01","endDate":"2022-04-30","projectUrl":"","objective":"Space industry is experiencing the most important paradigm shift in its history with the rise of small satellites and megaconstellations.\nSatellite miniaturization requires to reduce significantly production and orbit launching costs. To address the\nnew challenge of this manufacturing process and switch from craftsmanship to industrialization, space industry is turning\ntowards other domains looking for new solutions, disruptive technologies, and manufacturing process.\nMini Launch Lock devices for small satellites (MiniLLock) proposes innovative actuators on the cutting edge of customer\ndemand. They offer plug and play solutions that can directly be integrated into industry for satellites robotized production.\nMiniLLock is smaller, lighter, safer, with a longer lifetime and generates significantly less shocks and vibrations than\nstandard actuators such as electromagnet and pyrotechnics. MiniLLock offers performances which have never been reached\nwith any other materials.\nNimesis is the only company that can provide such cost-effective actuators suitable to small satellite with high performances\nand reliability, enabling features previously impossible.\nMiniLLock will accelerate and leverage the commercialization of Nimesis technology and ensure Europe worldwide\nleadership\nand independence in the new space emergent environment.\nNimesis ambitions to become the global leader of this domain with a turnover of € 26 million and a market share of 28% in\n2027.","totalCost":"2413543,75","ecMaxContribution":"1689480,63","call":"H2020-EIC-SMEInst-2018-2020-3","fundingScheme":"SME-2b","coordinator":"NIMESIS TECHNOLOGY SARL","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229299","id":"101003374","acronym":"NOPHOS","status":"SIGNED","programme":"H2020-EU.4.","topics":"WF-02-2019","frameworkProgramme":"H2020","title":"Unravelling protein phosphorylation mechanisms and phosphoproteome changes under nitrosative stress conditions in E.coli","startDate":"2020-07-01","endDate":"2022-06-30","projectUrl":"","objective":"Currently, we face a global antibiotic resistance crisis aggravated by the slow development of more effective and anti-resistance promoting therapeutical solutions. Protein phosphorylation (PP) has recently emerged as one of the major post-translational modification in bacteria, involved in the regulation of multiple physiological processes. In this MSCA individual fellowship application we aim to bridge the current gap in the field for prokaryotes by unravelling the unknown regulatory role of PP on proteins involved in nitrosative stress (NS) detoxification in the model bacterium E.coli. We propose to examine for the first time both global protein modifications (e.g. phosphoproteomics) under nitrogen species stress, as well as characterize PP in individual proteins involved in NS response. We will construct a network model that reflect the phosphoproteomic changes upon NS in E.coli, that may pave the way for the design of new bacterial targets. Understanding how bacteria respond to the chemical weapons of the human innate system is fundamental to develop efficient therapies. We will pioneer research on the mechanism and the regulation of nitric oxide detoxification proteins already identified as phosphorylated, by analyzing how this modification influences their stability and activity in vitro and in vivo. This project opens up new research paths on bacterial detoxification systems and signalling in general, addressing for the first time the role of PP in these processes. The proposal brings together transversal and scientific skills that will enable the researcher to lead the development of this emerging field and position herself as an expert in the area, and aims at establishing the importance of PP in NO microbial response, a novelty in this field.","totalCost":"147815,04","ecMaxContribution":"147815,04","call":"H2020-WF-02-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSIDADE NOVA DE LISBOA","coordinatorCountry":"PT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"954782","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020"} +{"id":"101003374","programme":"H2020-EU.4.","topics":"WF-02-2019"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz index 71f132ad16d88afcbba31ef4c7c6206c8e698c1d..5672d6044f2b502ef499f0c272222f5b9435fc19 100644 GIT binary patch literal 29456 zcmV(zK<2+6iwFpTP!(YS18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUD@^OOGST zl_t8szrs+9nYi2)YN5P>78iBVi&3q_j6b4Za?d$t?r!Ez#?zcNXs9cL52qb_96OKieCNM> z6sS;q^uPjRrj|0I*DxsHn zc!ht@M4T}-Vu?uSDo*7v68LYylVHMl95NZl>WXJl#ca4_Jc?MF@u`@oC=^NBVT*}` zPs)^~0=}NfE5S0wQWZ#%Em{r)-q)nOLY~O5@uy z01z&ns3bc#62jjRkHb4z4W4DEUB z#evEb_zf0-cq@<+1r+fZUy?CPK~kL9Y?7#aJV60wJTgN!e-xh)VXnu}apnQnBb1|2 zp5ipaYM9UO4S6a;#sipE_-z5x9m70DOZ#^NSV5RDdSEdI1y4*EpTiH#6UpHxf(egf z5v4nuL$Pfy@*oiKq@j!?Y!E$a!SYm~;P_7vCXccd|D5N;NCwezZG3{QdeohKsTIsTF2(ri0G)k4x)iZdC=IlL8*>60)toEi9Q4!Z*0 zg(r*nN+v4CW6Glrn+P6d`nf3|;R%zoIXrfE>pHY&vtN9e6Cf=R3*HHJrm{5%n$L>bSrghorcbE$HgraXji zrdm^g9!WEp;WP*BD`SpoT7vZSHB`+aTL^(lVWLz7U)nf%9r!DRCnWmZ1u#8PsjBVi z-`$J__KfvGqH%)isu+}|=W1t3rfH4}ajrl^NTdI(0|wtz32sW5QCU8>P~lJX?1?nf zU%cW`Ug%K#e2@rGy|l~Bp2jJS1l^e(8XoF+P-XLsTdJ&FAN`WE9o_M(JPXuJ5A=#B zQsr<9N)d&m_>3S+xER%}_&Mv_Fi4I=gdfC5gFca}+*^7maNS!k?QB6O&VtTBOC@~rdIw&z{N}MR`5Tw>AphK79=>gRwP}j^v zD0z9^=!<1Xlk(iX3T-EA|rUjA}c9ZU8e( zv4{s#*bt+UB>Ca(D-~U76Avc0m&27Mz~<6l-Sm4qnvhRnpFIEM*@j6e_c*M*bF#HU zDDRA2jr6%|QxCw>ipQ{lVq86KM$aT@P0-&oW*jO^a#Boez~Oc_ngBi%r_CjXcaDn} z_u)o^&CE^}tk3Rbbnf)?1*`#zx9tk959=;pXlLBcb$c%O%#5OWVwyHYq!tVo3JrAp z*4aTHK0MwiU-jTSKdyRIJCZvdN*DGE9F-+24$&d(IaHZ-Nw$29eSA7N{7GwLb*}gx z+>fAo(+SSf-+uMG-dUIJPq<9dzy0dl`grlpfZlBFy0Q=Ve!q9T-(zq4=dW1z_<$Xr zoUy?Pyn6m+ePjc3gDs@7#!*oZw9(SqMA{Bj|M2kS_@H;zV{e}H_V;^l-t>;&by<&f zk51q8yZAb6i|*NZZ@+)qeb+xZ?loipuZ4%W4#jyfR@Op>TES@>o?I02zAdq2)`nQ8 zXD6rKm%TF->L0aGupqs2w*Kv#?%B)!Ss_&)r8)c@sW(2LF3`!Y=mT^#5*C8X)RXZ_L@V&Q*jFEs zalHOcrf6NWLolg>3Ao`YSaR&+-r4?7zWS(fI~DfySz#6IgXa!cs{Vp2W+>Vb)6)HP zf1URa`}77E2g0jP%9v<0SK@A?u4(sf=>5{^6o$ zR!hKOXsy=o!G{e6!_^YFAK`x<*2~-NwneIQR#;Gb?Dl~@Tn=w)uqFo$zF#WA^KxA- zm5~R}5bMWR0*wDL7|-kPV_cX1Bjxd>0kvCgN%S(w<1m6>iP?y>9{gkdFGGQmzA;tdLKP_Ri#q6z2GSGm0lreaby#z6$$h;8dDA4-9Pt{(3Yi_2?#tDXE zM4)0dTUtK+vdmJzdwWiHrwx7Cn3V2VT&ajBbiT+Zmn(RVU{@kZ6^Q#pk6Cgi;4f~5 zz5WA!Ae89kv6)z{v_i!XYZ>SvzNf*Pj%`fC#Y|oQ5Xl8tQW6ZVDFvlhJh+r>tkSs( zc`}tuGWh59uSKvD*S|@#1!r?LPgY`u-@rjH;2XCXVjk_y`u^q#CQ*`VCCS{xg@lz8 zZ2exZB*>T13pn-b_u2YA`|F?JQ<+Hr`WINC_5TEe1r|7Zg5TKNo^gEaa(Yw2jj8Bi zPM`=%S#y!W3MH6!eaOhY%I-OOXnT#yq!YQd6SQ`ofR^TTiuJJ9f97mio$8Q>VEf^@ z7U@?XG5YS+`h99{-|}qJ!@N(3FTLN4Vu01Oe{#%v?6gPryN+V4fA#P%hBrm@*)}7c z9re%I0gU$oLoe%pIb#Rl55MdUz_`$#c=+hO-6qcH(J<|@ckJTeT_4QV-Vqy|p1^;> zalig=1902Hw?DwycVB(D7~V9gBgUIgVmL? zm=h!bGrI5u4ukHX1|CWkO1#e{s_G?u2XtyG;XyOr2-WM66aWIe3N8FZYt!**&9aK0 zP*M|w%U}SJ$g%|g)P6Xp)LRtEF-}hNPQln&gem43Y`n^kg2x2-7>t6A6B)KD72ae!58Wor|(YOn!13(fa$b z3=|Uu&LC8c6jJf+7Wd#!9Vcg)szjg2Ii`^DP$mJ4%vd&-S)(TztYX}wd!PsBk;-^= zn&C`=Of$}U^f@q~`qcy!u0A(AwJ!J(coArO&k-|7sX4u(t=mK!P9+}55T*ibeD^@E z{}&xsXJtJdOpo$BbFYb77=nymRFnvH;E9aU&V_%h|D4GguS>I^s5IRR@{CXj^m%>l z;XmOE!baM)wLH&U{{{91y4j~L7aQD?FUKOYl zRXUSlo+PlhBE+ku`Ap%sMIkPWwnzk0lYxo%BYcIrVE=914;kpl2%QOSk#KcHU zv*iMvHa$&^@EQr$l5PBqwx&Dw6s$lnsaO&5kStloI^Ntc$SQQtjFjJAm>^zhtIG2@ z@TZ_6oXvMG0)l^m)=trq<)HghHd_Dt5RX_&`hApx<<-2gN}!_9D5OIkWGcbGjAeC* z-xDpWJ4ZjfOV-~naPqHzP%+#eY^vfZ{C*rPFX8VLb_@c?RTQf|3|alKT)=)}={#92 zFZtz%Z-ONo7Mo4Xa(KcjOwF>Ylu~)q_@GcKe&cLw*f3kOs7EltezndhOF5IJV`5Q45aXOP3 zsO%I2>d8dN48i-sWCHsEv^aYM^5{=vvN2nf0rx`kPIVdMr(n_adBhB5m7t{eUPx7a zzqLvB7?OW_|4Du+QV>m z-VtAc<(R1=_Fcrso8!K9w|djY&zg3?k+@Wej`M45E#7FMa2#3^AZ4dt9yKs;y?Dao zpvaS0fO;FKJ2>jKB~jBoSicW*sgsdts4Lh?G@$Uw`n@?+ut}mPVm40%?yMq!f>D?} zlaWk$oKfn^d>q7|fi1KCV+60Dv}xa#RB(T(xk!Nmz1&k0=T_kPy6x=z-U^efKVVyvGfG}4wLYE0OG3Mn;>qC|Snz}^r zOQtfi{6SMBQ2Ac)n7xfwjMoXcvwy3z=Zc}D5v%Lp2Vk**A-1x;8bR)h4DfT3MdJGR z`i~A9@(>O*XS4P9$*RK^>+eH${d@Rs7=8FA)UYSeiYt`5Oy0L=ZF_dLT|aLD4%jqZ z5h`I7qCbbT#g49jcgjF8wpfj$58r|XSUKaKlzf|Fo$(RO^~#`5%sAG6z)f#k9B6_F zc6!wJB$BtyzCkB7z(ufN$9c9YjL5el$`YQ>&{S#|<0V{y!p$<{Ma~AaSRhcL@s`ZN zO^YM2PTSPNht3MUhkMwX?GO~rsW+>_+r6nHKc8-m{5YPxFy&z&3YN;{+K52FF8dhi?@qg7QP&jv#(V!MU!+Gnu^}b$ANhqm$!G7YiLBAtSAeWuc{oDWKWdU zY^KVl21ip1k{V|L0xymYtrx>m7Z0{ku1KIlYxC{P0bHn6A|_fMs5=6`vx$URcvB zaV8VBTBu8X)8Gp$FerV6m?^z}1} zqW_8rg8tUO)Yje8n1zA1ukhy4S#xQ`)`nRO)>4jct<-lBjnlZlOr<1-{?p|1@r#^A zaKg}fn}I6~MrXs6e&lp`g*`gcajh|JnA%kgD7;6KXNZXXlXCq9v#-R>B zPqn6yA|Zsh(IJ9e5O#e1!;o}l8IiuCbZeH*semCy;Ca7Vw=^|B=XvEl8i`M98NeN0 zrAHjoCwN7{Gx!1wDsXc)K+83O6Xxvc^HF>5%+p55@Thx%@$LTc!NuVGtdH0`n1$0s z&Uj8%GmIKj+o76qNsdkGT5Mnh#_RWq{3X(Eh^(^GB18=B;UwVOJpY<;lym*Q0rt9W z0=$~n&IBB;|K-(L@A#m{&Q4Hr1R0i7p-apr2_2m?g%`t6z@k{Ij5kjJ4Ta?GrR zYmAS85sgI}OcKl@!3@3?$u!N>Xmsz5;I)*(wk!_&2MvTV485!E1-xMq^neAP9SaRu zSoAAy+}%W`DnVd}5dckhIctcyZz@HJ24owm&ntSt1=!2jiKeEeR!f%*rfiTUYIY@( zAuMeyvK4DA)58!%ZDrlIrHOfzhaGV8V7-({w5|Vrg!nehu4!ibzom~F3FFlo$K2Q_sCADuT~7I>x`@X%olbks zWTT>b2fD_-nRQC~wI=47>en*+;~)25pZ|n4cDMBmI-&axwa@w1iSv48?{M1c^1l&i z3a1k}*TD!A-s09nz@l-kz>nYb`sZzx2|ue{*kwYxx}$C3nmOryHR?*X)-G84TG@Z~ zws&@Vas2w6ef3d&osb`@7@BD2Chst6uZd-6&;3FqNLnB$kB%0P$%^BxknlX1q_nBv zV?%+SwfvrT6C;d;EibVevR2xOQidy# zeWjC7Q%V!5g}|Vhy8a=Bg}na#hkr78IjqBPZo*Nd3EH-a=5aAUzr$jhie4x&7jSDesp=;ntC*M7+o6zXeyXT!Cpza%=$VlLA(CAZCl*Mb~pE)F&= zV#A)+Fu#aRz~;_yNIO}lJ3{L8vf))qCF+Qx!@Az}cStl!_cP{~z9^|3>r0RqOE86r z*B6Lg#NNEXAG4BPJuGLoPB~m=Gs1%#SY|WUddot3jiow+d#%&RaGTw8NM7fwQHr*H zPg>fIyLK?*kl^bAtIkrLUNUuO6m;m|S9o44J3~VW@i$b)6sRg(Y@=+ppla?>jpi|q zTdNb%d=H6`BRT{}0$EQ-4t%u4nU<*1#(F-mUc__O*15?p5M>Oj@q;#p5NuBX^%xI) zMrPu+Qucde^xKr9J6B5$XtObFayHMS6iIH}`mINc;`P?K{r>F+P8FJo2GuJ>O->qX z_3MS9%Ob3mQdp?znBOTb)Ly0CTd)fkJX~=EtYD4VG8R*-P{axZ>uMHBcGgrSRPI7B zmjXqaqmf)y*Hr~DER0Nr-ExE3d|x1`$E)3C5?lZFuuprStQ)s0CsW(60d1A;@wg==}}v{Yk6~x(q@ZU1rykEoGnVMk}NAtiqGk#gEub4B~!!Yg5w(B&nIZh z3);MDg95D%PSE-<3B}~C%e8QU%G_hhyr_x%tqSFB5*yL3>Ndl(y5`~0akV_&R)NMQc%HwPZA zrJ6H%fG%#e(1`t6zb9WuVO(&ojZeJm4Pr2&8S#HPOa2 z?ei~K{~R`UWN3S-QmbO#Pq>U0981ZPD;eyJVY;S6H>)7)PFQvrQWokWxRrK)Fh97| z*ejzf@1jLLDwNvkdc%p@9V`|EB&QX2uDX%Z4Jkq!#E#IkCwxB7mxyjpb2Qw`Vhh3k z`lt18%_lciTWpAD-uIGhi(lYwn#V4#Njgo2w#0;|arKS4`wIaAP8zyp%i?=fDlR|j zmZTsIgGM3k^^zUKsU|8v{*L2sFVXjN5ylG1q^!Dum|HO~2DTgq!7#RvC{rY~#2RK# zDL=0Vvqpk|8Xi2gyd?1WWiBk%$voCCWcD(763c6uNUeE*A>r5MA;l=D*cAb+nnXa{ z3}|g+bnZzZ&>lC!JFf65)|mJOL#N@xHysuu6cGhkUdA83F|NXgZ_UQU4f)}l#|q_j zD>_2GCQR~p{R1-Tk42e3PgOD|e;zC-kQcmJM3`fWq-SUnHYDHfDV5h0(5mQcrix&tglLJX@;!j8x9zu zM?w|)U>_QW7;kcTtuLu@R~*zKV{?`3@NEv)4@O{f&0W#37#>7j;W3%r2=NW9iZ`di zSjQ_l*cv=4>X08D;qzMI=5DT%joNGxP+;W_S^oh~EKD_p#4SD|Uz^QqjQkEJE4cEo zOI9mhF=hx;Kc_T9hA@N~$|cN@5g4|Ka}Yb$Ii$jIq!J0x)Y6TZ5dCIVKGvtqTj`$1 zz47!8K_rfmy$;x#SH|mR;rcysH_Ru6p@}izz4iZ-HNnuEP-PxBfvxfpAoJyU?;HcL zZQbNYDI@s1SUaeRBsDSIoZ?yO3L zrLMEr&@@Bfe%@~q(mxc=<`D;2-=2n}p_-j)HXek7YLBbwxI!!GIK@I_*-$J*GE7J7 z?Nya zg(ZyPFsCqkDk+MCv+C&Ut~khmP63o+#bpD}+B95mMzeZ7^6oxvw{W2082UkFwrd;0 z#VAZEncXY+>-Bq41U_eYv9I4h+BEcddw%Y2=2|L&em>b0M(mZM`skwRkUyx%+iUjb4p!@%Clz)IA#aKxZ`duFU~dKH4ao=O zM#vKuWQ-zgM19pz8;=1=c{DvgzuokFQ)D1Xp_qvfnz^_%M)!0HXtixsuDe3uY0Ok! zA^~;1M@CCcl*Ue~%;zk5E$G|?C9|`64bSXm*p|n!xvjc3=Dgk^J)=aD6mW>*Wkvc% znRp2|#1I@wxc=_qHh2uUJiU|e$YF~ium$&8!R=9uA6>v9bubopb9%hq$1ZEl4(l3g zHpl%;60ZgFw?QoLc`m#b|LlHoS~8kEu9$t`oazu8n4#;EUx9O!Bfc@D`U(+0fWNL? zR>)t$xCbNf;7I#Z{~+yg_$XP?hQQ5a_N{4f#wKy+GpSOm^4; zbzBEm3BG*&Lky}c)Hu5j-ym1`RB0*z_+ouJ?t#Dd9ISH|+o7&Tdj{oHDqRJu58sAl z%h;)0TX$o+m$G9jn1Hs9_0A}?H#dJ|i}m+Ou-ZsQzFSVEHmA{wDs?IZe+;RHsdJ@}k|>yM3yENltkBmf1d2bY&tZ$tC-?a-%{=o2iWBgdr(daQM%&U{|ywx~;MHp6mr!MymgkkOVvC>>NDIt4Zvi29E z;#V(%N3h7SxlO;+vr->**jtW{9bH)^U;8hlh(_=*Qc~DD)iB^ZV!gd;B4@pKKsn&h z#WiN(hShJ2%w%&6;$Ve5JY<$Fiqcp#Ri;#Pl6P!UxQ^Agq#|LhtQv{IT(xMnt-n(N zi${&UHSgJUC$*_@{rhNB2J{X&Ca?CTHSW8CXkX*R7ROm1?v8a*(F|H45e;qEGd0;v zuXCs6X_vPFT}H<%FV*XVGJhVu#-H9P7*+(FQ$kc=WdYUFkA)u%Yf1V$1gD55)AMe+t`?R z@k0Lr3M1KGtnV}C+}0`G97X8r>&>9KbVVSX%UZRrVYa7_78pqi3Rtzfgg(+Vi&z{8 z<`ez%K?isjT)ZwXdc}l=9DfPd>;9r84SWi`sN7!~HYAQEIS{X_5}HLHA8OM~4n zBQuJ$!xj^?_GKI-f~O`Ii2cXqNKhmTo&pmtq7**~?s-7m$B08PEm{K=U*U?YxE4db z+3oaJZuGRCe@0T`!-$XZnrIB7{$^WBFE*t2^dLx%y(VZs*` z@!eM+QM3;h2!m<#2;GMpb}gGn94Rbk3_ei)dz1)KZgt&7p!iNAf@MJAH0&%^fRzz9 za;0QgB!bNV6C~IbJb@*KBZvBBcLiim-D}v&upfyKjxK^_`!URye(@RmxPR1t*1$Zq zxZ~_g;bido^pVI0xQ*r)mK!h?k}l#bF=Aj%*BEM+Ru^ zdNYjgz|uJJgyLIm)j-$$$bR?W6zt5gKIC1Q1j&ZJT&-<~PwS+$2V#aSZV6t3h%5ms z3oZbb21wwFdw7J(jKD+$V#rCeO_pJzu=WCUnrW<2RcUGe6r4b@AiPykY+;U(#Lgtz zd)2RNCEL-E19NutDU(@BXNbe$a~p-2uVyNNDI&@MJg>wfP?Z^J-3}^Yl&IO(0sS~z zz4wp%)RcU`ryH!F_YeF3aM9B+MJw7x+n3Cl^?TgEnqJb|R`+Jw)Wja`$@N7nvY&g*dfA0}z2d!T*2qR2(T$u@?)wc0bq+=x*m&SQ&MMbQCTf7dwZM}z%t z@pgOBjOX>dcK`gq&^qt#CaM{}y*SO2;-X9~c&0AOLKPCLc+Z?w+z0rl)snJyy*KUE zhd!n)@tZN^ZNa>bGa)k`!)T}h2};Aws0+IU3{Re!m|_t>){=@qrJEP|9-%~!Q}C=o zZ{x_h9hYl-gyjxZ^|QJi3{$4b05;jd_M2_ZT`dr6BbtjO_KxOf6H@g~Xw%05R6UNr z;fC+TyZur9X|J>A5^7Y59}BkbK-KSx6{>fOY3%5maz8!=eAc3X8^CP$0K0j;k{qL8 z6O!#-P&c0=v(F|)i(&HjUv<%Be%Twq}F~iF62-iagQ}l(?6o^goVZ>dsaANTj04%maM+Y=XF*5LSc~B zDzF%eY$1>kh)`^WeXsx0)KabAmSu!j?1BxLj|ZFTlfrb0hRE3?Jh~YcDYyiZFzK zXvd`tbQVE{3QMr%(K`#PJ#To{Mb5}*d(aZGu2GOFy%=+o4%Y&iv{_Z8A)Stiga8_G%TD%%f5%m)#L138|sPXpbsaqx3nI`z@CqLT|z@w9K5yKOnq_7G|Kf|N) zPO03tX08s*!4bTfA=t*C7_^!bV2bYza~a{scX%t+wFZ{GB!U(KRUGY=N(Xc_1S6db z12!c|?YBF>7%LaKWiEzQZROK@LuRj;>lE$deD(3KK6>9DUEodC_eS9u%`bzW7< zx~uS}+aKF!cynW8TQ`b!OY2*Yf^G~NSYZ38l<-N@f4!ZI%b${|shHzD$utNs&? z1-0su%>-6lqbovmj6*SXIwKfNhaTs|=pB-6ha3)U<-9CfX56G1{FI_Icf7JjV-DpkfC>2zU+`>P(>xpf9I7V5a10V%X|ohq2tx zHY`yw$2{b7`UA!vIG0u9VBYy_ZFxbrTLn}()PWIUP{G)+CY~Y+BJNt8sY%c6Rn6Qw z=0lTePtAnLQXo(_#?;6o;LIaoRU1!TgXmHHw>teNUV}I zczGJDMI^$eJ|xrWwe{`}q3GutiM~%|+;@dvKOVI0H|FOAmp8IW*39qQ7$-g7_Rs6L zy}sYgzSw@dTY#i0IAw(}(m5v&oMMs55DyLr5tjiGk;?Jzf&>lR#(4sgQIvCxrVTxY znqBCK41$E5+*lE3&L%vgh-oYk-N=(A72{zX64e15D#CkNY?X5`li<5B9SUz6%qZCL zT5FwA4{9~jp(088BAn+0Y{aQP)9_n72viPN{Y;wTmX1^Dq9?bYEf2PsIW5{R&EmK_ zwarN8AO)%_4Kbl`qP6+*<^}k;mK>fVj*W;Y+Ymvt0=M|?4sZ1Bv^TdA*toH!2NBk}l9Jfva>^o=KCHtJU@a z7CqO!_?g;cA#1&PK%+8c!^L)kKHbeq@0FDjlJP4ZuXwWo=WRI_Z!5m>s=+mR zBf!Wg`3pAc`7QdjJx%t|azU@xPh&(lTc}^B_`?Z%Arc{*NQN7R=V`+13#nA9djSt& zciOhv9WNxWN2{L7+ksMf1OJU5RT1W~hQA(MMtadpBH)erDv4sWUe2a0AG}21R%yj}mb1VX+0ZBPHmd-Y9ZIo8KEQ zVXs!aop{>UiKlnniUVoC>#9Rxc!H#%LV|PKJM@xxYD=oO{lTDjtodgLy?=bCMMTQk z^$+ogD5KGIh(*pYnUjeLnHi4p;4QoMzn_!i>5OdhA~^eH4sL{EC*Vb3U`yP`k!T^_ zW(LN=lTHFv#@p^SEU&RqFDuw)^OL)EDXF1t?%CMHb3IoLt9C${R_Cthl;kXSR*F)I z;+MIcYtD)?gi^p!@(~XVJQH(2GZ`dmPnWoZ>!54F6&NR}VIIvmQMV$Wj*KPJM43cu zhZ$a|LNJ4oAB{vpB@p^U69EH2RzIQg)E?%JqL@1VYF~O2Im(`$aT&3nH(D;XIEY$G zwNXV^5x`P<8eF3}tVOzkN2`{ZdBl_21M^$5zR<;<1+KTlu+<|bdQD#o*raG>V%>=; zd)iiQd{jPS^M~6K$5AFPBQ4MoEfg+Qqin&osquI4H*5gVG?7I%Rg|3m z9o3xg%_qxLBH?by<;lY^q1HKLSam)t_E9VXt7eWYQmSAnHp?iFiOvTfMf(P{lEf;k zD^LREr|i8%14d$O{I#-DRAUohD`(5OsL?c=_i-p9EMX@*%+%ltI{Wk}k21Lz$~2HL z&=^53RRNM-5dvwHvug--VEDsjQA=3X+)Px2#j}A~WrM6O6#Fwv>isvp?(yLDm(O-M?G7mD zaR1da_Hm_{)$me-BGO$^TcZPCqn*ErLS_?%jzMLWG>LdTtkX^;0M#vIk@D z?W6XoOi^TCuz|TtK#{}N*8l3e@ZCwN6J5X}4FOGtEcaT^h1oCMu)9 zzF@D#vI@&N+Xe0{x`N;?yTL=EdIeQIu>*(iO5ai7aVZQO#92Q3F}g130#93c5@tNe zvw|BHgW^shr=`uE5#H;Y?X9tM0q$eK16`f^8MkBf6)V*jtiOLw*Ig*bk~o+213DET z&Vlp(2#jN&*9!s8id$S2h~g)(F%@+KA+1f!A02#~DxyJ25~0K>%E=z5-AQ(lVY4g0 zV>nNY*~;0KMEVM(vsZbRVx!>lC$XD|uC&?ROgyhOdFle#O9o=oAW1nYwui4Q+5gL29qu)e$ zqZ>!S*W5)|$daIFr$gLG0aEUWP6gW2Mp>He*_K(Vuq423R5k&#<)}j*X;d;EHXQp$8x04>c&kXhj~Wby*BJ)P#!ty z2GsEGC~L(*$C=+1U;^ zd7XYCgZpuuO^i9^um;&$qFfmZ?5zc$OzT;9x2xe~K&VNIaKfLu!p}4oyMYF8&g1_q z3N@5atG~F}U$#X*;B)tqDARPkYEb3q%e5e?G@If`C-Ja0FV^Ar#&JJ;@MGE|xpjGO zN{UTb%qt+qiW>PRz`?aK8oMRsCHZ}?KX8!C@OUfs3sP_=+q5}l=1(nN1Am-$)@34Zp6x8j!P894c;h|30>qg1J&Jy(HsGe@J7l@0nWTUffgWbAJ zHE(YlpGG$zJO(DfWIAOduxd8kj$1#~9{Fk(v`VamoMLvG#-^8Lx;jtUdtN z7TaO8GMj(ew9r5lhiuk0>DbaR6^$Gdc!h?t(- zrLNQPmK~10PRc}?3Sq%Yf{EH`dh+u|yCo;fKsMZ11C@v&0f@ zSAm)kveIQdw&{8`+jusS3GS*q-faTQP1+E@okKlQvJJep5ja{lIP#4yCH^tr! zYb50focTzZq9!w{_4g%Dy?)&cu&9W6yj0$B=vperZSBR`LI%?)*NmO)7}PEPd&=j0 zMSYV~@FC<_Bxp`(2!@=BxWkU%YPdqQ0~Tx2cVCJGKHuzJXj**iiK+<*JqC-)5IMa=OwC^?sme(P6G3241uukHYzt#Aiv_VH zNWfQgu}E3})#ikFE&HlX$eCI#*56+;Shx<$| zbb+#hY0(q4ga_ow>ccl7s44uUnn|9hMXMXdTQRC`AN^_VuEB_QMI~r3E(rW3P-S`T zY$C`v&4)xhHpz&c-94XeNyr+)wR)!oLGHJK6KfjMy#n%6b>k}eND1*L>4t@`ZRw(I zMbJxLQ`7e0M**LG)?h8l6H4&IplS@p*vg>Z=36J*HNe(7NcAF-=DXqXE{t5m}$zK?kztLt{ ze?8$5Y3+RJ6<( z^IzuNpjUSq*L6&4YA(GEIc=r83;N|@jqq((CeWv~^ISp!(eB>>jJ6wM6l9|H;Xd>s z?aBCjyNukn>^r6TUgKhn)M5|Jo0Rm=w(`#UgYWsQ_Nr0bQvLh3-`?RN`n(?KuY4{T zSRzJnly(^1)SJhNaGH*E4pznZe_Ojp-SIo^N$_m*_OD~|G|@{@NU?o)-}=4wB>B8% zd?chVJLtZnM=C0sg31ixW#Uh_{#zDB$PEb}|Gf4jucbL`;Ae&3u6pt|SMLtrXm9jx z&pD^GNMGs(`&13=ra1aT9?%|F?$eF;z8T#fh0}&E+pA8p8B~#F41@^=V;a)l@}wJ{ z)1J^S*8^M{mG#2h`-4Y!Ou|0+DeXz=VlUmE@U~%EFeBW!Yaj6awvZJ+trZGMJ`~wb zp(a0dRrzVU3?`DpLI(vrW4EJu>TKv=Xza~yNAUNKjzn}=Llv|-0H^AtJH}Y-9y|p@ zyBHz=ahAmyQ;c?0kf@>5Gz1ba{=ZGFbMCC*V_p zTG{P5oKk~6GF^&2j8*)ITgOq}Czu4;>F+y!^)Z%DgrbP%VJ7Xzm=nj9ITPtzCe+*L z#~ID1uEGP`8t%tbw@uplfIh3UtrQsPI@4CLEQ2;hS^2IjIiJ>h(OYF96lcI^S;S36 zG>_7l8e{Dcz?eejF*cyX-4F{A?y~Z?<{uDTy6TYO}LwK!eg;1T@oT zQg;}YaFk11&9H5HC(GvEkfXipm)bkJ+sgGY)x2l*w%f?1*z6fsxEfWQlFC>}ss&$a z96vTO6BNxM$`8%1gr#&&O|^_muwl{Ntm<~AxfqZ>1ok)K!g-QNSTxP{AX80Q@wE=L z4AHqXnRSDF&F2}MmYtr}Pu+`c!FXiKhqGKM3T z>Nj2?U_6R0?ltqZ5L&elO{@$orZ|kjDqFn3SiKv=VMFZrr?E@>$YI2!7ZWZs|^BWtMzcN)o%~q-; z&vvAl{^IU*&|v*%I%Nn38S!BPPtvJH1qyZCkP)T_%^oI?+r2Vtz8CBwP zU>VreVu`@ooTYPh{c91dVlh@VkbP6D-1}U422s_n88OY4i}JN1>7F|`UeFs zq_8An#i`j1sMsk2k(;=(VD;f!f*(t^6;0-Uaoe1dZi;qJLYN+$gUR}jDFJ}+1#=9@ zRHo~Os0R@plqcLAfhH6UVOSzhOg1Z}azN9ng`um**X~U7gf`k=rdV=0tU7{8#$P3c35JWz@{@rN@-c47*N{rXvQx7mW z8!N)#tKh>o3`x%~c|@8*B4t62W@lcXg(FUH3QkVf;HYQ$@a>#~n&|x)w(l6v0d`nH zl6(SAf(r#Z8kZLlSvr}QBbh>m&@O!I6vis54&xGwhP^| z&bH2ThY?x7L_k{^T!bx;Zj#}|F4pmZYv(!mR0Hd!bt4h<_!Veg)W8)ExYFmkOV<-v zji`WAP^+!=dkpwV0|#Zp_5VZ@leGE0zRQ;dXmrHUaKgP{7@Oek!LCOdL>O-@S2^Ek z@akBk1?94L-Y}ODZnZWqgW9Y=V3dtF%VFy(4vx??7hq``vbQ%yZEnnSyNefrWRWc( z<6v5Z_*|)MlUWac{T_MfXA~H)%y96C*pOc;GaowQq4;blX_{J`AD?EnOLo(0U*`c< zTQ*vh50K8NMW5Xfa97PKQQT8_3B;$`kEu#$*u1n^DBG0YtD)^8Dt6LwwwD^2ZWGjV zqo;PVA7ps5>1D;@^$!VLM<|;PK95C;C7Un_ zF+2e$t}gPa!(H)9Y-SR$NK*8@tVG?_6ugl`*r5nTG-S;y%PUP}3hPFBT|8|8k~IZ- zz>~&#dw^Kp?vaLA7Yh}|a9_)!%H|UM)SN9;7^C`M@l*}h-!DqZR`{|9i9A+(*JIh5 zl8*}e3i=a4v~)O@vVNIHNxJ_^f)BOtBEW+|;6oy&5a+kLAN3;3YLSZB}21Wc@=hMU4$=zeo(dKDt1EeFzha<@Tbw*f2P? zg5OSwilF~kpWb8suNj;|?gZL)e6E%XT|+Q5PQN_rRj~@~BoW9@r%|mih0>o|{~5;6 zFgorUv+(KqZ1rf|}miBdR?#}ploM7HWE2dB~X-0?thPe_+faw%w^+FeBh`|waL2v^@nc)q&3Ph93m+5ic|%ck(j5e zr5Wxr%aI0S$V0ve=+%#5XrF*vO5*)QlO7Rq2Z2mh8p7jLq8C2l<X-gcQv8o-%5$m#9U z*(j&Vuau;mVGRG&F=iBZ{oZtg!8~`aL8hkHpVY&x=}6*-(OM52KvlGmw9Q4kr^a|T!hiINt zY~V~`&~pe!4s6djjIi`8H1@ciMv0l@ite#9p3jhiw4vZPgzCLn%ngEp$`wd?oCvup z!WbcFoG}Umeg>hXUx03+kTrS{Gnuis60fSpvJLjT>~+B}xhpX&^%)sMN2^FAHswfs zsNfAOxd?)Z)O#2#CW&<+$^@N z6tfs^Juqvq%nv`aK#-My|KV2`_^&31%?&bnz2<7*>HvF-qkR+E1}Pz%Kx>G^f_nss zlIi-r(eo;fIqW#Y#yQbG6?B>4(_9m?IoehcIb z!ye60?oASTReQBIdcFNY@6FCk&d=&hX$~7$g5lS+RtG3b4tf{AqJ~)5r8iE!Q}C9u z&E5kZuscLcaaSNRQq8z_qD|LUr4SkU;oC44W^71RJj}yo41Tp@aO~mKM^iatD%5z( zCF+qJ%wKl>`$UCmA=7CiVCyeP*B+qqj5IrP=#>oD2wp*ZYaA=&^#?ENs88j1gq_4z ztjbMPaqI`iuG?g>wvGcGfEqCon2Et-Y5p9My^;pDT+&sstl50csUMhk z+Z7dl=Vn{yAxw^@cLRYx5;)!_SFR+@*Vc*`n2Ao>xCwk#JcM=EsC!jtq{vvdqcnp zpDX*@7!_`)A9gLwr_EqZMJzj@aR&N0;c)k>QWBao&?uN9+srwls!N$@)+( zS*;;#*->@(j4d^D^_*C7s~UN`Ud3uCn%I;%nT#cxb0@$5L-SIE}tG8*Ey_grezOcrJ;EB55$Y5i>M|qk`ePiX#x1a)M>sSpq^q)SCRRIBwDH#O4J|a-wV)j| z9)U_oVDS3zQ2t|Mfb&R|oqF3r4%d0cQHO9!SgmJA^_p+Ybp zmQZV?3<_jGz)M~Af_<-~NU&apV1doE4cnfqSai&c*GiEz_>+$lS*qFkMYj{_Uy23R zSry<{Dn?6eZm^jB0#gOaxJY(m|IBQ!KtiVklqEhfdL0HGE@YB9EKz4_V#vc>1m6^_zfH8F?5fW#BwUzQBR+UZid9?z#<2q+y+T5%rWN32COiVdKdYNS zMR#TjrrVhYJ#eclq%%O<)0pAr(mdAs%I#?XA|g#dQ8JE+gDZ^}62@(nNE#`=&P)Tw zjzzH9G2uV_Z?Fu?a##2ZpRy~yz~`R~-9t6Qo`B=A+ycFBWE&UYrQ8J0Y-yQm<33-g zL<6BQUl^l<0V@frJX|d+5Hz1vFlhLvfk7Jy8d12+&okS>WYjV&=eOPZho!XeI-Lvt%G*?`a9CcOs5U7@$>!dbdSn( zlgp8TWt>PDL`s(GMu$U`9tVED|2=5R*kG`HqfRxPgkDIr7iyJyGo98clO0 z@hm0mg#Q`I{8=D~TbxULPZU;tcI3}I+q>T%LA8_q#% zMq*=dc4t)dnw{B0N0X?%*6(i%k9jSHu8(nJC+r`iiCUxoUSGTB6pApl8&S!tBZ?Yd zIQKSQE>JLRM4wdk9%ukvNebI+j3Vu;y-`j&qs8D$O|2jDE12OxP$-N{wyYX3f}2AQ zejygea-!_p06r_wRk}vM#PsOA6?~nyB+ZsEA2*^Dl{C((%oHdQ3WZg{KG<7SKxaq# znhZ%d#Q8jy)cmwzwP!FOjpMW8#}=9ZgU3a%wN`W9DXe?yW1N<)?{r#0ZP!MVBI{7PdRE){ zs_kH8JR-}|$S{tHC7TGRAcq01kMc1(FIjfU2oR_r7{~~^8WeOVD*{}poVwnt0%W7X zpW3}EjS`bupx?ti#W+vY2C{7! zg%g2Cedg8&3y@2K{*-25GHCb$4-&;|ywGiUs%Aq3nj=`z{4^aiGk&C|1T~XBS?aMi z8B(|BS#Fai5`q5B4S1obH31)m+7nlrhkxFUJuX40CyGNCa{2J zi7nEoc7Z*%Xc05>PmE{loF;OJ7L)#EZI4VSjvPPF zXCg8|i>-VUj~u&4st0H6y$HPqjO>~1hd6|a>Y*$eCXMq*89aMJwGLI~&w~)9M--?$ zCUC-&q8T3!>j|4kgm;5m6d_+9*WhoCu_l`wvb z2z58H8F;KbvD~o`(LAYk#CH=Cs^uE2jKUo<4ef=d2AM#f&eAgi-+zizPKDZUWiq9A z%0lSZIw-Z0qU2;cUw@Ysl&1HJN_j1Uc7%GjkRQICrYlP7J9U4&l+bic&?Ur+biV%n z`qv~|fnhM+8LoeGFQL3v4A+8)K|=R3*R|b-Xbk1aOx~mnRLQ6aAFgCU0!82&_=>#3 zyvn_V@><&c+|>G zEsqco9j|!UERMFiP_}CxYzX|G!rdgaiT}h2-Qog;U_@d%0E4)hpRlh-tFP9G*J_?w zq}Ixw6hnms&b!aAZM-lr@k=-wOsZ)Xio4w(9#!q5?lF%C@Gi%%Z&4Az;zpd!1-L2e z|Ch6+QI_D%VSRntNHAN}fHKCi=k_e78X&q`Z!T(z`3PoY{DduRzW4iTTjg4x)*}I+ z=`_N#f_WqMbEK$~f`yUNP-<^dd5mw=X;ye77rc~bxj+&sJtX+o_5VuN--lp47#3B3 z5_wGW)+9R6+*5k3OfWFbho!?n@fw4auAt0j%>FiFTN>gv9wYa-zdpLSuRid~O>C>S z%}q=2D(GAEKCv)#5BD9n*IsAVxNqx4(le!kaP&^S zsT_|1{cU$28S&-#DDMJ8B=*wK3=V^iv-S5^UbU%LBgLU)ITkr~>6pTzWij%f>0Bzl zdVWKOw8fcshi9lG3nDXS_i?HZ3Ol-2m8kW;dM%n1?vOzpYKk)Kka|NO<@*fRtG)19 z(_XNs$XK2CpyafCi~t_JzQzu|WfTt{vsZ;#+t!-!*CtE@U2;>h_Fe<`qb_o+HJ|O( z8Z3k|Atchyi{XFfhNG?P?li%6*|V8o&#bPS89^HZyZDz-Ihg!Y(T<4}p4PX(i?Ykc z1;Nd-^$*#qfU96>wl2zHM|}NnF`vw~1Z4-<(E_Dxx6wvp_=AF;EHwkb;=rOPFhk7d zST+kJ?l5GpF-lj{S#5eX39}}klX|+b5NvZ%@67}D8Y!;LaZb&1tp>Q%BUGO}JW-}ws#i0)G5a5`+_KdAi&L>;<(p;-kABl%a}cr ztpB}J=Q0rlB4OuhE`!|?`P43j0BagFBvl|NF(DXgQTl9(EAU;yB9x3E^oj)cAVKYn zCNaaR{1i*jLgEjYs|EIn#y;~JKbfh5H)}D$t{?%A3ug!3gFe*EHhAXK6wR5=c=U zzJXz9P$MaF2)xFL+#4ws8cvXc)T)4mO$7KllO-k$+fp5kX&LB|;FJ_nCJ4|ehpil} z8>W^xq2h3mIQAH0!UT(Jb}?;bF1)7PH#M`O@wugAP1iq&@^n|_U@ktoC1<}H4A&Re9tv@k=WCM$Jk+og27h{c7RX5#0)4;o? zFI>gxU=Wyed7xH;0qqI&3}(yL*045B?;MH)Fo>oqp5_TE?n|`isLAINM^-iv&UK@l zMb*uspheJyewXP~ic9>;Sfz6n@?_eGo z1nYg0^?MUue1dPmqriMa39!XGZI<^UZffdB&4%ZKEZa|!s?2ex4$63GDNq8+jj)7> zn;@BuTMlsJ^^JjTej)Pp{17CfrrT6)x0ByV6ix)Uyf=(6g#-+14YKtO$;jIcvQ zmb7`q9Ra!;$%z0HWNVSpRr#ky-ST(|yz26&Hz+Ah)sZcbq9qd7zt?{}UI4FprMUze z3yLNREkW)s!A7L*eI@W3EOrW}&lHBi;W!7Hi)_ZH1qOPa55c`-A|7wSSKRZv?bzTo zLSlr$iWJx&YftVfboG=hQvaqbR=~_6Dp#fjE4d~G%3uK z^?yfr2@$s-qR-!Acn$SCqMm9t5LJ%KFbKdEgK;Jp7Y!eAM@L;jLW(^#%-k{C`$%@i5L*IP*nptSBomd&jQKn{8PEZW7w15(@>|oo@*Z+$)s7J-; z)3e+dJHLZtd7JL@>RC3mv-T{DLEL|MUhR=)04D>-NVw4o;m}D5Cv-6@iIIpPVRh+2exsu6UFqfAr2q^AyJ8?hnJ+I~GLy1vZS~ zI5(xcDK+j6StPG^+wh=~U9>~m8k%8+vtjA2m^Az~S%!%kQ-z(3qopQ9TOgX-1jJRe zELyH0+4Mq_t{EU+Eig@+tSm1K_*^1QHHE7?@EuL@i~~cUA9aw@`>+V9mxHm!SOnlH zJf6@^l&3{mbs2dt9@L1c6Q6gT+_wnz<6*FWO7nh4B@wvprm(ZctVbwa^Qxd}pxD;E zW0f9Fs@gCy?1VqMevkDK3_%wO0*l}R%v8R?E4gK)UP*_WA>YDn(xceqHQh2kkJio8 zI;jYv6t2KcYLL5Q1=#`%9x&ackPJNNBkDK** zy~LJUJRfqfY|j+jD7)?K>=yg>2E8|L2K%qNhv#ql`>%TJRqw2OF*rOqJLsKV9KY-x zvpqJ@Tp^%8c|H`0q1zjgZyjePDDkRfNv#V*A80RL=T@>D_78gOw0FkdblKo!zu!Aw z{|bcAgXG3!?0|Ivh6tFBR{xoXO4~GUul}wbAnKi;3>XZ(81_N;pcpm=eRW|P@250K4j|RQ7uRgl#@H`TrI>Wm_b|bZr#8>*aU;U3M zQphE&clzRQzxr3hN(wJm!-AI^jHlZM?M`U+y}% z);1)J4Gg-HK_SGo4%YvU^U71=!Rz(^!&1P+JmA11n6${F%ov!Re|OlA%?6b=p9vwgr~r&1Xo0V5iV!VZFoITT4I z#|2pG7qCGScx<*I$oqh)@)!etUgLCAu&(R>m=boFiUCX3#tuq>fK)+8M88PVAp|#1 z5zmp+EZVeg+DHt^swxW%H-XANsm}|hdwGalyzZ^<2Hsz2D-2`w2*ZuYH zKm1d{pNC&sE#!x9R(R#F*fd>b@Oq|M_GYGrvI+8k&@6dPT-Au`3|Xk&J9`B*-_%x` z{Ugf#lUW>fcxQnYz%oI~x(OoN&7}eUQx{P}h!qgHCer00li@Lu`@Bv?Xj6|?r)m8? z9VdNB7bi*wewTLh5_dwlF2;hHz&k0As`eqSAzq6re^xj6H&m5>_Jw`LXzW8; z17;yhHrM;2WG79yjIgB?l11+!|EvmzuuGaifC*(lM+`}^HS2Bg)< zw_9Wa*HgmB8^Ye@jmK8GMqtJCBRsM~8*`WCxGsl9Tw7KotC9OO<6#z0YNq-;$2%PH zFJ|iche$4T_tH5wjS7+Ulv^=c>>&lZ`}#LVb)zR{m9gWWrX63*!4E^-UUlk2Uk~)S zfdA=VSRtqmf1-?@G#k8nnKsYi6ldD7l_m1S=$wG*$yg)<=xWzUO#A?jP{7Pc__0ES z6^S4l=D5<~zb2B`$Aowe2R0_8h-UMO7u&8n(?!Io$`PTO5c!ogHK`*^{&mCiHG!yB zX2x@1>iO0%xXZX*ZWwILMMMCc!c`ovLvBa_s&G{yRG^hm<3{r`6<)DXwTysn09$LX z7VzYjo_U_*R&JTDro+KbRFNh{M^0y$;hPGhkPyedgog-cM4T5rrcDCf`sh*Dx?RHc z7_jye6l)6aKWgxvV_&tYS65R1DrmyO5R*3*1>S4yo&yQ)*pa2oYWsw>48dgm-6%`7 zK#!YtuQz^e=m_?N-(3HZ7&@O|Hvpmi2V&bo)hn{Cfcblg;W-3YTY=jK@`xkr(=j z1$CS?bnLCE%4c^9jOsxtjLI0OyU;~`aG0A$*j+^MsC1s)CwBQRA9L-goPqy8b$Tp3 zL=KM{$jw7;OL@Sc)_;U8pEH}OpL#u~zs#F~9?^b zOp@bz>G(tmZ*xHxPa5e~_jg;=Xiw`RABbVLN#KxP-y&|LxGrXaF=CS^Aqwq$FR`89 zZt>!A&s~(0b*t&ov-SH9{1Y3pq6L7pJi0MRht16g?RJm!=X+{vuzp_`r|3u9q3XYE zX>Kju1fjJ9*{uEp|A{BYfja&2s7F3VOa&qsF1@+e(ZIM%&N>zk5N&ezH%?@~JWZwc zSh5wLiv{QqinlPQ$;^~4F3gaNW|&4+u)8l1cU(Pu>3k73xU`e(yR=zQJiWkrho3!D*O zRN>Sdj#9G17->ZTiYCFf)YYkGtSr>hVR2J^d^$M%NlVA8){p4mWbpdrxO@FyyX-~x zU6&nqFJ7O#?V^2CrR6}S9AWs54W^(-LbXVzd~Y9Azu2H9@RMS1FpZoKvTjMs*;e4; zxhvVZjSZp1(LCZACpd#c$5P{&s>@?l!PV>I@qyI!dI$Y%btfsOl>B6#$(dZ0K3T+a zVmB$9JT?7E{S{MlZi;#HlzO|j-#F4aHb)w#h&99!#wzoe&A|GA{g%!s$(b5*S`1a_ z6!|6^+It)!&rQ9Y6xh-FJyu}j2Iyd{lLr~Xwe&it=W|z>91-mPl&|^jQRBEwa;}FmfS{5YKJDdSCQaEnK48Ty$<}7DmRf4YymTZD) zMVRhwS9Z?_aii?G_(vj{f~4t?@IgWj2~2a032S41{DWTW@3M5N2igoH`W4-)-BM#V zwwlkGVPAWs-aTpDnO%$+GI)w3Vf;rTryZ=-=bWmr8*r~ml@CsdV`%9I; z+ZndZxUKOQAQ=|m8lwWRJ%x6t*hZnY&L5>y_Toj(|4BFbZ<+qQkKsyi#)w zkFwgUp-ys?jW1l3`R?>oT_+=LvXi#Ui4!N)X9|@woI!-Hmg~x9GGk+bP+T~=J#)lv zlKrvg)bo6}d=&TtOS8U2RY}XpNYMgGVD)mZIdR5^B54=8@CD@pvx4!x4e@aEN6xEn1vt zMDSJ|@I8DSui5b0k~j{v<7MiTkIW`pYA-ymkk{gVmtRD{!CrAf@syxa8^A-!TT#TA z^hDI)K1*E*OB`GJ6$jC?HG1Xdh0As(1uR7LL|I^ymt*&#|&Z(jfh^(=! z9t1om*zyGVV0lg`whS_6*9c84MEHrsRD49)gAP}wx-l$5jm*0$+ z6%FP7%`_RKED)1--0^8aAtw_y!3USuYg>??Mil;F{hmx3s@fnZ-u5W84SGy9>Jgty zi2i&JreytxI}4811btcK)0Zj^TMWQAmc2|o6a*%4UhSj;@f?V>vY}F1AcWSi@Rn)v zz4#ty!k&e*{>fP%0SkZqQ}645s=kn{?_0EwV>%6!B>iFc%$;*C^ecbDxH_3+zPYZK} z*twZvz*cbrE{DbJSv+KC@||b_dnYK4mT$km{^w)%B??Eft^e>c$$?k@g)cycz_e|2&Eaxge2(9Fj#4+lSC-HXAXclN6L z<^_CXz>Y6ozv*?)Ui8j-{ry*iMi%qum@H)FS=%T4AN%;{XZt@P&Wtz@!2QNjy;`ru zHm(|HecwFkHx{^W7Wf@(;2!Ih3*2vSeZ7{+xwOscJmCvWNpsWZoyy!gYzEHQWx5h< z#m67M#h@8gV}Jsj%aW84BWfy`fGyDMb$EA3hSzep2B~Oi$fV3v zyJ=2_viG|vt(#mR<{v3?jD-_#?&aXgT53l{>3#e2!E4;NgYz#@8{-Ck+ui>;yksfg zerx;J+lo4p9drlgUinRbfJB$o#WL7q7sm(o#ZulxZOQ0O0XaIDv`n9&Dn18Qd<=Ky z@x?xBQ+ToWhVA!`&(BWS{>jk=C?n8KXZ`)&*?t#G*v=Ya`cSmqbdlRZ0j`PeJ}n&y zyMK=NhLLvt+b+BWZ(L}3dnhjmjF&BZ^c4sUt50IIA+fh}>E5pI`Uqq=hHK+R@7?Li z_3zGl2ghImy&asc|M|^v@8yY>_Tq@W=pJ<6?!UvA^w8mR$K5x*>%V;X%|0yq2|IZ) zc-K9-nzd1NNdHd!5t2QRYBj=+NbJ|Zt1=-Fi09C}F++w8d6}0z=KDC7v z=wU$jum=7*rEu9acbXeNV(?zc1U_|Ltb*qy%_*{!2J1gk4G-l}OvxM($(eApT~8M} zH*5>J6tCsqjwy5`E)}BGu4JV7;NXVFM5l_!7%(GXz>F`6CVLEK%O()O-HjZtv7Z_X z+WJ4%|AvtfGaT52og!a=zN-oPq&y!@5uXQgjmz(7V(7_+E=l(g&g<@U+n%{e+-0?2 zI&7M)zfYDdUGX@-6zux~SzRugIdBls031y>qBF zqH>rUsv#dIoaa1O9Y2FWF%5FdH8pcptd|zeKCci9V!A=e4SarifvDNgq1gk<#Pjmj zatrnaP7Do$xy-;hu6T!}CM=McZBvb; z08`Napxrxc!C(uPENS`O_rX zRA6U{d^8vZs5tOn27lGq$o?=~ZEo-Z_{?~DH%T~eI;0QV*Ix7KZaurwEh`D~jj3fK zcQ0#ibn!!aH&6}sel6i8lpyl8374ew3WR9pyGHVyX3f{mRn&iocZ#6t3 z`2>u|5T**>vsfg9#z}ZM)WTz>UA;ABo(_3_69-*m&4=LZ(P(k;L!PXE7{a$F5-k3^ zZ~j|FZqucyxpyN=RO5yu?Df&bqdga|R2D9;h+;)KwbIR~42K9@LEw!mhF~BVXAG+; zn4#n#f>^ZsndtyQ6a!y@9?(yc&XdRrqcUcpgHsmUoZDY!LkFars} z*jA~Y6U0O5P6RwgLDx2i13zWVDkNq$f|oE8P8FjrfVp()bzkHz1u5R zZN2`u)RYrxmb|y&9kv~%G$BUR)r)#JEx}GBo>8drrba1`i&T29D%enMoEGjCoFdq+ zQT4)2{?`4SN6)_221zPotg0W8F^qBLqinj>`VNykE7_B=Hbc}f`Ayg0e#IT0BPlg@ zw3H`U_^LK^VYo$#hd4N1&3apot>g)P+m7RfLAxCZeIs*)XBT{P=`uH^##5N!3x1xnOFrFbcl)heZaMQ}1s@_SDpCeYIy&J3i-gq-6lV~TQO+&bFnf+I z)Wa@L&MA*=;SDFGiMk*)gU&WJ)V+lU7sTN<;#ddE*6)k336%jQLv>qD>M=nW&lg2g z(dsCvtQ4iBKW~V^+)vSZjlNie@9k@D9nec9x6Zz!o9GYW?Bg7x>8rrne2 z2oX^H;O0Wchujig9sgQY8Rjw#IdsWo;-Gvg#R8}D#bI`9Vq_y`Q9`m+OTv+P< zKBC_XtY#OnnguWwUv-eF>)##gvWQ>w2Iu{krZVDVctk63O|QtR&s<}CFym3gx-jYI zC0^*`LHGP8`fn^M6|4(aXkD<{5Pfnge)h@FTupjn4`$b+aakBDpkD(Dgy`l|WNT?h~*eXi5)F^mL|Ih*bGm~*Hb@eHX5A8tAe(_wVkm&QN z3_w$_nvT>xkd$6`ty>^poV6-pDQ`f|pvU38t!bkXTHJH`K1 z+>1*Je25YyKug)Q41jnX+jZ8*f|$#nJK>zApzJTA(XGff^c z2cjB>@J~%PeGeKWK{m?n#g5QioB^I;adS(pfL4ZG-d7|Nb1Fh`2$5*=VrH?yj+S-sorF0W}Z z?q!Dd?|>_P*|}sN-&{y->n?x_4ck^teyW)zZLy8#J6(S}uu7zh;dXVCzwZqO-TvU> zx^p!gbbtMz`=xzI>+{CV=+tU{f6MglZ7uECFY4fCYO686@813LzW?)3P40V{Bq;a6 zKg|;Hvcze;Y(2v*6N?IP>NaGfh{ygZ*^FeWn%Xh@Sau@%9$%_=QPXg&kVr_LfOMy( zlsAON;tl)rQLk1dGIQ?Q0xb^33#fvHbQEIZCx={50-~Z^qpdk#fVBexP6$mYgg#@* zBgblTd_vcjAF*ri%C@9_>Y><@I&XZKtKiF(oU%`R1_5|U@ehwDDX6B=NBM-V31DNh z2GhsrLLo*lyzY`%K&Q_#+HsfNUK#vk)-l=15AK8^?9RW!nU$MEQ!*PX7>rc*@z%1@ zs4J+%jI%XikRwPdrgV++pIW-xT3pN33bvgp&<-nFv-+M&SvrYnI(Sjr zUed0y6Gw3v%L}YnrOc9a1eF?iw8tU?A{zZd@G&RwSG(wR!zwiMhEH&xm$9c>H9KX` z;!%15;e}^_OptUB)JqgGRhmelMwiWH5D>5}#7Uu~L&TUI2JJVFqQR>77;N%{nvOmT zBe2lK{~$;8Z`s*&J>P~(HiR^cV8#BCB%27Jv_Be2-;jR}qj?14SAS}?EZ{g5>7ZD8 zlo$WQ@4e+B4CMrJzR7Iylm=nzzW-4{3s$(mmT9C#t^`Gmbwt$Ui3;YQ@MmmA z>`M$C|CVxyEhFza)8j030fSy{2EmPdQUA7oM(J-#iRWrY>fQ%8!k=z?YWm-G2kMcF z>rBVUnP!nwl)n0%eSFjPGzwqxweY0JTgRK+)%@R>>Ool~2P#pZ@*Yl3(7dZS#bys? zb^*N~BrqY?CH&fvJ9|tU;=B*>&({_MRf%OiE%65}SrP7R{lKg4M|H}me>#S^tajfE z&dTCN1?Rf8vnC1H`=xI&ZC+8Hf3u%mt8uW(nJG-`0%ry)D4~YE9Z?26H`T5M$)EfS z1RH~~-%=h%nQ8F&v$}W+Lt;2S`mbNo*Kc8?;UhiM%I#4h$=7cSZ>b)P&c<^XDkz{@ zXyZqox3pt78Rpt`dwE+5 ze565wivLMTpaQ3jfwR0w4JKkDLR!>wqg!qemSrwR$#`@rCUNu`6j)H0C61PJ_y`+= zskW0B+}7wT9TCqNX5!`&gXaeXZ91RATweRPq^lpXQqlL_bV|VYy$2Q4uZE0ydMD)T^4I)g(#U z7&N0?nQfE^2TWSG5u|{)m3boO8_l?H=h28d<%NDuZ;A_B_w;eCNM@6?MtZiq6Mi6V zZnFh_Mp#}T;ftm`fI$Rf&kwJ1BMGJFGH=Mjv0%czlfLIpVI=Ud!(I615k4yL!)8LzjAdB^Q&|1GCrI6-kl8K+gyP|fnl-L zI|qj9OG}2UcWm`XHsrK2mbdyVJP@WY1@P=t2Key+7tZ4TjpvHuNtd0q+2Eqn>z=ev*dDv`xf{&j zdj*qlN*p;-hXr=mS^YSYmdDgPBcIz|ajwayjc@Alam}WFnw?bpCh&Z9;$akw4!v)| zK2<}_spXpB1R!I)~E<(J*oPnr< zAMhAnKs>qXZ;(Iw;!0*qeQ{;!G_QMeWw_})4?aJv*~Q#E2Oy!~c=M6(aYv%$f(^h) zPGg}8Mc@HGo<;DGPx}CS@UAU^x#d0Qv zwRAywS<3~?k4YwmViGRquo6O@n&LvxVJhtK%-MJvpHxrdAU~1ox#zf`rzLD2=uX$V z<@eu4Z0_0a!Wy$B54@Yzk8@PSjt5eCi7%JbE`BnUaJNAM@n1zSGK)F#4sS2qR4?)^ z+LxbWf?briG88^@^b|u0Qr8w}d<1%IdeFa$;GcK$j)UYa+ZO9ja&rxP@LAnG-Oo<) zoLl2DTnSJb>Dl?r=r%fAruFb(p z3?hfklpP6%GXmO|8hy?T>~FgaZqObcd^-#(o5X_6`gqH93(E4xFjK z|N388I~v~bi1BC$E6~f%Tz`U|Qfsj33T!S34=OFqgR;PQEp9!Z`{-3zdb1?!!k zTo11Lz4jR#)Lzbg!l};$DCgDxAh$u-5)Df+ox$xbZDlW=qI9=3v#P_3Gs}sqx5|43 zQ4crRhAVsjEl6h0=3rG#VB~Nh=K-iT3tqEfim&f~f7ya}gQc|;qt%a~0q*}`fp~}6 zdsEMP{~c%n0S21mijfDC1IZPk?1;*zncft9iu(>rKwd=2TQ*^XRR@R;@*#ld(vz+Y zau~F5BC4eWwsB4c>Gwvi$GGEoEVz>njvGZIIu&4p3d?i7DSBSy4wp6`erXRJFJZIf z2LqDydI4}f;A{5O_ZWAp8m`9)3<^h)8KVtaj6yvig>X$UUVw{2FGT46O1R{!Kav>m zSMSsTx1Zo!@KvjKYGH(0nAJPsmIFi^EzZtt*>*-p!`y>KqUJ@iN}KC=1HrFm{zt|% z_sJ@si!i(qbK&0v!_^OCdL8D>ePy(ti5uYtQyGH((#9ZYKOf8;Yb;<&_{>NI@E_1? zz6;-giqU|xvO|^jb`HkrBv_K$xwO4l(xC(dSqE#h969g5x5bnc)|^|n(vV&=V5~Ns zWuFh>>@B%3Qr#WG4F+kJwe8T@6YTPJ&(I7F1cxf67+%w7g{v9_aLbfwlFBTKXT!}! z6V_h6n+dQ{T}y@hpQtb9g3_RWgmc0s89bhJY{woyX}QCxWdMElgF2 z3m$!oxt_ac3;bPyzl%bk&WY_m7)vj4XG z`m6ijpJC|ejr45r?NHS(7og*_RzjF~v=Ud%_m*?-)|+E|247f;=_0uQR=t2G?j^hg z5^pK)e+%9v$P$c5l|&|-A@TFw4dKGwuv9X{&Rvjqpw+U+0d&-*F|*dhepj2d7{NHK zjbLQ_(-sr(Ba|8o(6|jE7KLC8EmK=Q$%-xTuN!{LDUKY!tpdcJvW$a7nNw6mMcQF) zr|H*$a)EX6+#s5InQTNUa+1-atwc-giJ=}YU8GEaI?M)IWE`;GL#x*f(4K>{3R)(} zSR1A-@%_rp*e{wCr`;=dF)K@s%AyV;fQQaDQtSAt7V;_n9;pcz1+J!=e#Kk#|To_714UzFW?C2L{t`#M{~ zI$i{q#!(+IuKqu%c8l<4l+53$XRrQ*_OAT|j+QTGz6=CN?&wbN=o{1zb++EQ02e7^ z@7+X!M7fV&2_}>2`@a8qoqW&byUx08@MH%sdzTqc76tp6;I{CCv79j%HZlO~&+}ug zA8Hu^LY|{T6ml@yDDSDSH26qxAovYMJ#{PyueiG5>IHts-6ao8=6>70L%oYxGdCJ? z3oK%kmw1cy&6H#~8nqqvKdZAHj(#q62yRdMkt&ZddDnOn=IEe0L4KS+JbjqL>c?A9 z*3+3ICP6%z(4X`BZ}HzF$GZW)AO9IF5c_~R8Sf?#J{iXhO-d#7;g9=2_@3>}WiavP z;3scXv;d+%W&4#P9`2}jYIv_*2?|V2EJ6<4A85W(OP?-)O zKtc3)WESX|=#W}+S~M)@27T=!aEDDJS|h_tw7Gh1xzw^^{!+Ue`q}n7{blg<8<|}FZFD;6i{^ zoUV-WZpJ=|j=qPI^hUtF@Ifv(Vx)vYGPaMXOz$ClI5tULHcp^ALf0$>f;`jrB4zX$;Bxt1>!b_^qcaysMS12N2Ag$nem zQ+fymg;nr|;MuuL$#q_wW)2^r$l@Q1bA3GkxzzFYhG?vq_h%Xk{q{)Qd5#YCQV2$> ztwqeE2S@rznz^D}R^Y+LVl)^Y0Q(hBz*H=kI`2ZTNUE0_Z#7z|rROn^A_!JI%lVdx8d8Z8JHG)jzG zB609S1h6dn1+Z&DSFHZ;iMQ9w&d}%@8Gt%HtB!^4=Odvt8Spa+raNDbD9RURLy2i@ zKjr zdv*1T;#y_Cflg6JeO|3V_UFv!)0!d5uH7W6lmqH!Q|%&`YWGOqfKZB`RkA9O7GkLc z;dUPg4jaUd84yL!suwATBdV!E7@$C~IXQ=fQ4Z+#%&>}Nr1Dyh1aTFr+7qsxiUBwp zn2!)`a4BU3FT=Ge=cX~3FSPL4!d*K$tcFrOYp?)%3Urak-ST1kTDP8|sPj75KLZC&>FtK{!~5v6mw`ky|C zT69ib5n*VZlJXNF;p%8zgKCP2ZS9a_qtAM=lON!`qXV3;IBq@gp zmVtCjR*C`Fw8^T@5grxF_+o4T6}4Gj8k4ipr*U;4O{IQ{3$sRb zp8Y#;&C#8>%1X`v`34)Cyh))@AHvw4E0lu4CP{G2-7 z@Ww5eCf?0r&hZ+o=dKvY{x(^Iwe)JiyrlIQfEEguaInFi2h}wOYo0Wf`Uw504BdcU z4wS9?eD#wLw$6G>V!(QDvLw~V96i|8J2=ST!LI)5Q??*uv&H_aHoCxp1)~c*SkfeU zI3{M1F*PbfZvh%$y*1bol|c2?$cXbriy>S}71k}&`iq59t@=(kx$Ficc#|2`B?e$r zG$@&sUb3a5#UgdS8cxSV#a4sqG`qLi+AP0S4*&W(tcOzsrweO$esmg+{`Tk9@AQ*Z z#eCv#f8EqpF>rUAktPhlV|FE&k_1^1G5ZNd(~vFUR*@%&he5B!*{(~Sup@qex_ccL zW~xwYi_Ibg{V`!DxxO-vugd!~aG#rP+FK4v60ziHM$Z|V)9=4)F&DvQxIK$o_x*R; zh<^V)?&>7;7htEn|8CQ*G5|+!QiAxQBt*WR@YU<6iZbFvc#-LzNFgDfN_Hvyhvy+`g}>qY*Jxj;HEYaIMwE1tUF@su!Wiht7g&S zOQIT01kTRtrmN5`zM>+5Eh!Qvh%3Zz7n(32{kZer=xls#a(A?S|*Bg8&+GLM*_5?s*X~%07xHUN$(-j zoVALq4CLyY(n6JzG^7FWzNrC%qENI@rAWb}SfLC^_5G@C+AsNw;bIy&C}^d~q&DKD zvbyPPzT~%J>WW(=|CmW%r$6i^81+j@9vc2@Q3?qlM)DPWsUz%>@uJ_R7YxzKTVA?y+2f zbWEF?DN9;t2amvZ>ZPYwy^b+}-p&l$pN9fuJ9e%CKYtAx7*Dag2~GCq<;mV;MfPfK zA{YZVs99me7H*KPl_B4dih%918~mXPh@H`4^;EtUjyeeY(2EoT!j-Dv8?HLq(JFu* zQB@uya`COp*Iitj83#ud_EW+)%(x;Aff-F#?`)}j#3zxPVZ~wrxQP}FAHKI7BX2~I zWk6{(1c7g@dc24RFI(mhW5Ep=s!cgT2I1-le}aKZOxvbW)nLLgJ_qlH1huRP!9J1= z#DFks5@G(3%ifF2{lJz(3Z!)d%m$M3P4J`DA;z;fhfMx5!iYW8OGI#g=Q$A#druYM zrpapLdGL_{MazO1`l(1!LfSWqc_ACh>K!4B%S9fWAV&?LIhrh5T_Q*?8v$+gqInE0 zM1{YO!PgknmX))_VlbFvfNz*w7X$jRnatZ|Agdtb))Rpy@=%R5WS>$KAhnmvY?30w ziAX$Qcx03^R264D1b->6Gs9jJULbXS9aYdhQu*yDaZy*95H~|mg&8g8$mvXCwMD|7 z)Ff@Ovxt6{fxFto#|Zlk4+zizB45V}mva$+BvXli3NTxSui!}{LjmL>tw}w%_eK$a z0}0D!xt=owC*cv6I?T!CRG*2SWPkg!Qj>rCD;xQ%|Hl#u4Mm}H(P-_X0oB-SWiCNdDZ@3hILf&ha|EVh zf-B8#Ez-(a1dV+C5u%|)@byQGK6{6Tg^D92^|SP4rq{=S%T!}J<%Uwoo1-oT1S zXYB?qCl(fowA2b&j+ZbL6mtPc%2<&-QjhH{Vv%F^7ewJNt#IyvN#cn6Kd3)84_NF7 z7E|MrqBYgaz$n~0_R)2N$}KD ztG^5@ND~{LLIZ|Kvx=PuE8AAs^7r3iTbqfeGQ#k|k~VH$sI{ZhT$}IB9MhyKr-E+uL#NpUuV_Q~Q-)N* zE*XVyie@L)v9{Pb%HF;9&3N^LJ5n!_$(fzjwDeUDDAj1HBGAPJ*#r`00xQ;tbayM< z3f%x-DGTy4pRNHrp>dKIQ}BgbPN`b7pO>&h@P5hbT_?wD;GQ-u#LMf^mJ&~%hiwtw ze{aLYWDb6sjRs%@jX;W?H3t9RCBnoc9>Db<;e7R@Z!IetS?#)pZilOyaZp=ny`l?u z9pRsv$t~_y7jSCj>ff8}+m3V?6>6ZjS(eraZE-tC*tKXE5)qJ+)R6%&+6~SqX3ChMr zumrh@TWn@hJ0&U*X+wo4h3>;dZ8i<<)3Zen*@F)it&!+4f}_yy1k10HRiw zbG(&ld7G>SPooSHs~o+#^*Gwf9W6t%O4e66;#kBefz!%JkC@|SNq462jjKsY8_j!1 zW#)DH>Oq{1rpiUEKUFMgBcTehbe%csNJp19ilyc)jZ@vIK5_FsiDl*?x8VqtJg^7r z%njrun{0kH(xI&+xkel-S`FCl0xeRO8YZr=s^Ex;NyCw01MCR;=cCTWj&KasVnJ{0 zoGqtFcdgpfPO0}SNQAi*mhQT#>(xoTwqnPDWUbU>Vak;+a9fnaaE}Kz?9kWFAb>Mi z#gxxCgj?Vt2d<>HtwZT}tVrEiUI|@XuwLDb&9rT_EZSHIe7^r_ zHHiNR)Wd#eT+~k8=VTuBQ>6WNm#i7j^dLkiMS`Ee?9D`z(Gj7rf)-Mi5t|siig~6* z(pmUw^+Om_KNctTAkVZ38p1|(A{kU9cYPt3A~T~ATId0cUO74NHsZzO%I`N4TQ*cq ztEM!>#H^|hd!cYWgM6U|&zfm-spmEV!+g1?iF(j7r7nty~0t%3T;7ftTS?9hFG&D=mhUP0AOIqY?_eAS^F!dhu8n)Wk(?M!AYQkz$}O zrXVKLaRPROjh!(Ahm0lPSc+;Dd!6TKF!9~Ezz61E+V2ljXKp^r#vOX7`wR{mOD}vx zLjz-Lh|*-M8cF`}(XX zj*@nH5_A7a7d^|1Trwn{Xp3`%V)zLg%K+6OLKNXx6aQ(8aUbsXYz&rN;LXMh{Rv0X zXH<=iH^=)3Cf=Ce#T}W1O|7lObcQ|O7wL~D_kG*Swjp$@ixMja+-Fd2AsF_)4KsWU z#>y;8YvZM8Z13;HjQ&{-_stixVPx*1s=bXn#7whYlz6*6vk^naWxB^XObrwY+SBHi*A;di#!u0 zM`I*=1B+cLt4?EW{$%s{HmlDtJl~2Mi#2L63>@M{eVQoX2{Z1zY-2m#7u^BOIEllN z9o=Hxto`2C$C7om@U@hx>hpP0pZQ4s+|8v*&74cMsa>!)=E9F9rcZtc_XT!)&;k%N zJKSjpr>F#@dZ=E18{fcYBxsF@%W?x;BbLj zlsf!^F$=P55beDEJ!n)bUTJ-5vD9-}8w6{>iWb6vsdG?ec%uX_7~Usor~)B}Gsjy@x0lnzb520(3#82UmB%oiswy9F)F6FZ^ z+_C-HR)?{Vigs!yzK>~)Io`-U`jOfRZC}z?`+J{${8`>+)E*sNifb3X5Y&|T29_6h zBe94QXOH+NY;}v2Znivk2ycdnUAe=6jkbNVh!OYMJ_JUaEG{`JqP1 zcU*f++5&84rUotrE(acA-xZ z;~(*Ywh-Jg(Oh-0k+yZGxmh2B5BCfJT@&vsgTFE@928YUoT?}rXzO(&FfrvOeK#sq z?O;l2n-`RKXj6Thl4u}yuhwiJVj}&}wD{|TgfUn3VODf<)C8aZl+(6w&=>fcLf{Q=J%||o-L?`D=@wj zHWg_YJv>~+w3JJT(C}qFjaRl$SOqbO5(fQ7eb9N z^O*#59HzqX`4r5*5Ji!d0Nx4xa3`z6$PYr}O*l*Im$iSWL_P1pKdQ9d1R%BA%6eAJ zoDIUr&R{{u;C_T0)B95~CFCiX5^hK_rlc{bK6c7lq?8DPQ?29;!fZNRAzCtVun@zS z*di=$r$m6kD|;o zQeZLldwAk})d7@-3oplG&>J!1JBR>pL(gqqeYTdK0|iHl?y7g%`^Rh4b{!8(LmW-= z^o|%TuvF7e+HySYbo-lWJp+8{%|PT=-BT2=JX{k*Zh&*4nQ_;3Ef4MsHDP{|vF>Jy z=`wH9fte=**b*uewLI75G6>MIs+P1h2A&BDvPSWT?f1?~XttWfC3ZIN9WW>Kc{CmJ zaXH9bF>e#qS&eLjzCgKcsiUkcgCwQps1q&V*ito}rJyIc-`ZZNXleGKyYqP1I*HrGRv>m)RZMQskOdWKTYpX84l2iIRd+AtzaGQ}j@Lxx0Z5GyQ2 zmz(Jj3md6B!?GmOPn6=+c_^U^Mm`+swL#%TL3DrI*+y?z6Et7Vh!D5hUu1yoIhqh6 z?zHP2++CH?QiM&&ux-hQdIznQ7u)4jL?m+jMK45PjaNSm!$8e%GiJw~F%NC`5M~Ie z*5Vv8U=lV<=Iy|IIE4X@@Ni(~N)Siv6zxo++{VxT63O=AYr~3eVUI}k?d;&}#^B7e zR+UuT14qb&$0;A}6xAf+*C}m`Jy4X{i9FnCkSc4a#nz`xKBLV~Q2Fb|s|=WD zja)=D&p=5Le^Rb9eWss_^Mw1pw5+jIJmg^2NW6|QT@0$Vz!L6qtyIA7yx>6PIQ*i8 zmEAk-3orFsmLDODDzOg2>@11QLSebD-GjM~U76UYp4u0*uOopC+xZOMi%|Szxa2v@ z@Wg^`tn5kUbA%%9Y%kqAq^cZo+pE9|Hla4yc-qZkOdSQ{Fb z8O2S(A!BrOf*a$EM zbgBKs6f7ZoxlnvS-W*;d+z--IzyIeoqA>7BGh>kE4MaAmvd)rXFuSQZtjs5gkmXI_OQ@2AcmS9 zt6jCC%pIc2usC|mptdt@ED(ojbD?X zI>)ZgnW%cGqUzONO|>95;nb{=H=t}tCO)ncd-3Gn#? zYh!Me&#>)amBmobY@0Af8zKsi1_$Z9 zef{d7fR#jH)DLXbFIhY?VAJ|Z|`pxx_dRnyJ|FHd@>rx##P^}FosNhVIx zW$o9OXT5fx+8kfD`&Zpg@3Q^2cX3{7H3vSC;C7+>0rmYHQC>s^L(QM%tRkamfMEHk zx{~3jm}Tt?JJQ9G8EzUR(m}A+Falo&haaQ<6%LF=iegQvN2)^Qvzem@6~u%!GKD3= z_hai^ITVENgl}=M@HW6FEks(zWnw}z;CQYsNi7t+6xX?MZ8wFD9UpyiRI_jem}0Me za>>rPKN9ScBPTDSgxb=#{E9}-#z(pTN$n`}armnf9MsU~<-R872UWjx&x(T@RP&!2 z9@EiLWoG@bNd4H2sg$!Wn>J_gs{+ZVCAipo4*H@hl9{n|jNolI3 zf~t$ptVfQP27#q96hf6$uxo}9ZEC3FRU7W!>A2dw<(X<2hSL{jB*xJfAw$9&^5B0t_ zAQ3*S{|A?SI6=Mh7c2wBZJ(d8H@%bYIlhj%zlXa23Ut3w)oL|Ks5}% z#>K&59i4c=RlXWyV{x2?43r%_Lu4k*EKqzCL4j*e#X2v+wB-6# z=f&wfqHg`^S+{p(zT~cwmhI%jL3&6POr%aejXv1ZT&g}zwQ0^ zv(=CDy5M%w%(W~c9w8VFbYp5dO$tkaG*l&dBcGZkxp9-w&?~3f0tB@3GD7qb^E8nf zYj8WamR;P^W!=@X7;b&KEzz}kPt<09!nON^GDaYXK>dJ?AEJ%VHC{(G4b!wvC8z(9 zu7ZJlUC^^v(qyG;cx5-t$xS;_UDt;yP>4SG*XhS-i51o zfBWm&-jIQ6jHbgrX`isKKkAFn3v}w7xYTTJfBn(=Ufkx(-BfL4sz_kuTnl^<81OrJ zp%Iseluet*cnllQuz{a?eR=U_@V47|dERTE_qr@eTV34$zSCnp_O^Z5eWTxm2mA0p z5Te(ex81?jd9QQzrft-BHPFHx5n94inw-q ztA8;Za^20&UClioYkndc3+`b!j{&f>1B)=SZn3rc4Y&CmWE1@JDGc*t7V1qafi6Nb z-)UBGHCc=oAfFtEk4DrGS?c45|JL|`4bQC!oIrzVd>O$sgir9)>|jv3B$ON*PQ`?* z{NNS5>|b2AUtquZCz+f__lm84ch>H|=;iOA@vO&A=J?BQ|9Kzmmh1Bu?3WkkAZp$! zWCsK3!`jz_H~LL9xSn^fy1ldB3;5gUOg9&(4Q=Evg$?GK)IG$Nq3p)aGy&KGDc=7x zn!FQGh#4EHboU-?QX0 z3PWGP;mMbU=>;DK%LjN84SBY^i#dFCMNrXyf3sNX;@FvY|E+)%D*pcKe_K1~V4_B( zf%mrc;)X4`Zw3H`O_pazApI>Zl7W23hY$at^YTr%e|dfW>WY00mQn!QuIzmWS=HD} z;qxdMdb-)Va32wY+nKW-`==sxQ17UHGFLUXDU%0*MU}rueIBNQ98s;9Xyzj4nr_82 zHDEZdvoNc_P>nRM%?Zl@<0`EF0?WWL!{K^rozRQ7_?F;j zo4ZL3S4PZNKa71xfLnm17vRr=iJhTNaghTf@O+}gn97j3=tCq?@a#`OS2O-rUY-O=Fzh8~5f)^C}nL zHU~L=ll1x96^(V}h!OC4SOkI-{>#d>?x5~L3u06Kxw;|kA61tr92G0NPPs+NZ$#vT z2!BhD7lTr!zDinXz=KI?ga`k|acQA7elTzBzN7+{9=u%H3B#C+x zX$n8ndac=vs#EneLYNLQg%mknkfL|=s7aZ~BQ}^+tw|4KT>S|=v4~}96IXx4A%pGy zm?@cXCmT%ZLpi3m(Fn%GmJ#dAKu#1f&r*`X`Iu>$(1#7CRR?dY9pd^&&Hz5Q+6pQV zjlCA(*t2z$(Vc*98?5h#ytfmDKLgK}W!S1X^B2p-q#Rdju-+ScT*pWIpH;3mmZjtx z>UBF8sy`SOISabAb(Cr|J?Q{kZ3-yAY2SUi8IK_%W}k9O;&@#9xkWW>wEvv|R#k7|yxsw(}40 zx8-3D(AkrUr3}_UOmBp33;Q3aZ|>e~ya*&V1x%XVaNG73eI5flXzJj>MUrQs_;DAN z?Q-DPBiGW#2R7Emm&Ou7+jm?#$y2!=);M5rlZW_gNt;dhsUVty8x$_Expl7CPQQ;s z5%@j+ova|8HZCt}E%!piC(~h5fWM_GiwZ3#0}Gq=QRp18oqRgg{PQxYQ9wXE*4YI? z+!k9~&CeHNwl7&~bxnoJZb9RUTKI&Kt{FX(#)S{u;A4 zy0*i4p>^kFuz(xMF@zrP+4fIorCe1#iSvMXDtZx_8)l8YUEH7j&+14`3SB)f;Al@X zeS`2K=$f3`dc%gFF*~8k;=+Rm1%fNS3E2o^ENZAo&y@_YMSt3Y#F>h~z~4c2;pq7$ z-(Dd^&hp5UP%76;T*`pXo zaSrN(re|_^T-OUNXGp57k0Cm4--7;xH6jn*wak3h2 zq^G-!>r(z8%lx~8hJ2MFOBLTOT6k1BRa0$Et!0$7%7s|79d4!=6vh~ATn`7f7JMsn z$OI&Ninl3T@rXwq4mmbKZ z?YA*-t$(of^wD7@MVb-V`#NoM;6K5th@6mAqq=~&(Ae$Hu5D^* z{H)Y4I2{(x%4Lp;9Um(Is#JyvRj+ zY7`sA#g>z04{z%ckx!~Ky?M%yiWb1CuP2SVi(SZZnmPX8r>EUA5o_a58Ri$O&qm|} z{>U$O3U`3@J=pbK(25-$SF{$7;7oYay}~x2pWw@zC?qa76q#9P6uZQGj)uacHGmJK z4UF-9q+cao5I4eZ*%1KH|_J>)lLvEtb50u2ru6D1!uKIc~taq5uE#8rTZ|lxYCG@_?DqI;(I`zCX^HHEsNsw0Rjt;S%Mu^f+5y!Mr zsu-1eAq89vk#H&Qr|&h&s2TG(Y>8{a|}FCtO^?>Q$R|FDmEpRlT624Eu8-7 zP+^*OVbs?MA2jMe%L|`r)GBsnDD($ zgs-$!!|UR>>G8e6#HoAQ?7^#q=f&MH^8tB26!62ots>)y*Ou|GVOydpsHTybWe33Bol&cmR zjNJs|&f92Xj2G)6#yqcK=_LIpl7bWyDj(SsY0Bvf)291@}+k5Qwv zI5{Cyq~|P{E9RN70yw&et=s;HYoG%M#f;+GS*Ps}y{4+|pm;)(WT=iRqM^{) z*MPQHsvb3s&_ma@@G+A`TtLfw9c4U{|BP6mG+94%153i)xSxbqSfA)l~3c z7xk`(l~A!mQXEwn5jq(|_?CRTfVm`ziHLx0kNBVx$$nVT=9!RCbj(xI|NM0%})eMV>1yy`p0;q`$skuf!tNp}ABG z82%K?2XQlf`0KFkLHvywcr8dEPl>1K;W9My_DJ0_6^!rKx$JW)H_@~7Tu}jbZS)Dl zrA2kwwL<HIM`Qzg!A2|$mz@!Z@{q9RaS6cf^p7$1%>OGMweVa-=>|3cR{G0tTe+wqjBpliru+30&WUI;7D z$N$FnQ30oD4|f2wtJ=@wkH^*6gcR=zI~w59TcXf!8>|mvF+#B-wdJY}odlVe=xZ`! z<2pA5-w)EzDs8O{&>Bz&$zhbDOb`G{wQSwet*MH9R;;8}krzQHpUPR))Z!pOOSM4^ zW&$b6;dIZ&Jdk9$qu~oL6(WBc2^OzflxHCE<{+Y-svja8nzogMjfq3c5`<}ELoT-- zUOTKTdQc``MYAe(Jo1x6VUh1Klx%s-hOC%29ZBiX_+faLfWH+vVB@p=v4Qe z*l2+OM<>xHHv&AG7?;`7q4G*^O_h-ACR#fv45>8p z4$FZeA|@CL-_MIy5S&=4L?PyQe!8NXJ9ON$SA939{h$UvThjY^!&+qoK(^z?xnbsX zv@&Jjx(%J`y%nF8xDq_|-nKr+n#!evqMTQCj~YsWDYc=OuzE-lsw@G>=B*F70^t-_ zAZ243sBqxR;==N+TSb+b3T40N-5i084c=(cLMe|Y9c;ir>qFPMNdRsA`5;G|| zq)^7R%2-xoELCb%Q&qR}7$OTS5J!jR7I|F!)-A8fnzW;Q5lE(1jwj?ZdE$hmERt%o zG{JM2%$^oERUfd3Y(Dmw!>tJ{W#VH<_s({Hdr^I4ozJx2ceOs)C ze(9(H;!v}WA=NDwGro+lWdsTdsfus>TxL!47u_a3*Ik)0I)BA|Ml;?{Wel!OF#JgufH#aNa0P3 z-ZVefYHEj^ad##7RL3*QW=Foei1PgBsB$r+gP-c3^zg1%Sz4JdZv1uWR~dE<>q>_U zCW!Lmc9(8Bs!ToR;<9l?!JFRIOX>!HdeLX9tm&)b@CG{ENkKeqZ8Js6Zd~Ob)o|cE zE8bP|9x_GFX#}K|7LsQ2(!|r2%_(9CF^$r1SCxGF$-$=w&uUaj!B*>g%lYcZTeeg+ ztjIZuJpr&XimS?~QUl!lST{t(uQS#GI7q0hkq}l4Mmj8u@N3ucz$K~ zhTQpdgVUI596rsUU6dNY$9EA;t>>I6OehZuOwuDojH8I(%5opnnqu-}Z_yVzIGcd!z z%y$}Is=CmBF;(scQG)oV97=J?iWuk&KXN?1hwPF;)pu5W$Z%thY6rW(`dHbPqb8^j zhkwdJ<_8*I)C3utewW!bsni4eMLN%7k}74_S=8k?Uk^HMR*~E?Tm|(&kzFpEb;cJ8 zx9LT$t-L9c>-p+`#ThVjC>wgSh>v*=0$)Lzpw(god!=~Mx2^S#59*Ned;58*r>!WT zBY6N#(Z@@+3mfC;^J?@`zPqw6tD`>ZJ`-`$FVCa|TM?v*r1;9<)lex89YW8hza9?0 z=noJGTZ=HYbGeUkMK!+z1M|2thf0T1Q0(r45^yW(Wz474L3HK`Seh z4Bh^y>#~{!MA2)V_>zh*t;OyHTz^L^#yc#+K?Ju0Mihv51?!NAIRc-xawugbasLJxmORe~iz+0gPBERLBM_Cd56!YE5ml|k+D#w<;gfW#erDpi{ z4e)j~U6VJ#OoE1RCC2b>U2pDLOL6}P`1>|k>b!8all(28c*v=?2$uKX;``Qe0UpO> z_2V2|(Zw`!qTBc1S=417COPNU?fVjT#LOyf&dhdP$)l2sJ)K{i@10+F&bsYBJL&bi zoh#PvT=g!_2khdMUA^qGFE4uMSFGP1z$2ZPtkXTe>UY`GFZ-PmiSn1&Cxj(2#18Qc zsz-_BDkku;r4P1YCOT(W9#(^=M>z`v;SY9{KpZH3LlMmxjE(DXjC5;-Ohz7Jq{7vV z+)0X5hXyC3aWG^OQ50ZacMR z%t9mHXhD1HWM}JOeNI;IXt`&^(;DRQJ}7F!)w_|udiS9v-jG;sts#qm`@ZK; zm>Y(xsijwc;X!Qp*&k#S-;6$Kz2PS3InzULx0Kjh2Fh;EMYY`l8lc2&Ho&XyDI0RX z)F}Reu^FHntF$`vZ5;b4({0yMz}+R!K=yf=vb383d^J+k__#md%J%sfjFPz7ZbFl` z4olkREVc)geD|di%GUx5go3FDLKOSiNes&VDiYd=v(Sjs&BGq5PT1|hvrv9G_1$j?-??Z#>4 z4l48G6+La^IWqhH+vxr$cfppR)#t9{cAn}BjlX5F~*L-OS-{SC;^CS&XRFG~zaBw!4 zr43goC#Gx>+5E#oN&!JVV{j=3OlIP&cgA0S_5*y?`N!|A^*x&xF zRu-2Xwq^DIhV+a+f*~D2rjH=Vw@4kq?DsjD?<@BYcwj*{H1s>-2wpZ4V6ve+CpK1<-{2e>I>z8xU{DiTp>jQr|P0cmA?VaRX%81YRA6{5{U z^&a_%;!uqtG|pW&hiK8NfZW%djsf9pl}VspM&$eIPtyv-xN$ta0m0r3P(A<`#-Bt4 ztMIkf`vL5Cq%lsMk{|(tPheIg@#Ta(75q?6Wx>bZMggY3K|S6GS%n-j1Tf#w+4=O7 zPxcS$x^I3~bfFWuouv2KK6~1`-2cTFtfPs9bCqnGUJj&<+&*(Uya83g<`Sf`cKe`w^fNjEeNeFFLQcrO zECMyxJGnT`4D41R?Nj24nZrZ;vaM=@wdt}XZy_3uW3ERom&$+65M0GCF%c76Ipm-# zoW|;pB@@pmZI*!rY0CfmLLpU24P>OfEi&DLN|Rla0eFXbnywqpr8RM)B=Z3#I_C49 zXLC#+fT3<4JUXlaA{Ly9a}f#F^(Ww5g)VY4k}7fW)lBi;_D9={RjA`lZxD@5Wc=}-VN5-qVrXNL_z3=_=RWMt9=lo2KW48GJROAIaHw`l zkGYFumf}|tVG-8)T9Su!VkP*f?h2x1ZD11l3c%{cc|6_!b zJI<3RPzX53ks}KC3LQ_uzOROX!UjBtO47Fj&Bv5knm1tJ`<4`Gc%JzSR|V}$#&25t zqzEDQ$hCFzl{98!_f1+%_q$gY19sA7XKlsHf=vYX*g%ZHFt<5&=aPiP@lh+L9iKVe z>p(hp$)E9tItq50Gmofu6Q|D5tG{3ibB1+!@NJRKkSqHLUQPN$gN!IHp}D5hn@4JE zdb8wi&WrA#dv-SHylkIdo%K2|yX$r7I+gK)t7*-`c4% zAd#D~EM7X1vcVMF;OZUHDuVljsQ?$utQ()quNGL>>cL?3S6^T&U*)C;t9QhsxO!Kz z-I}v58VJrerG046yah*6O?1XCzk1zG2pl+47Zij|jo@8_0b*Bq988+NLb{M}_)wY@ zoC62%Jsss2I_u)<2OB3uAIP)p43m(_gvYt`2XpKyi%c=qb<*sM+y^PQK8K%~!0yY3 zzo;U+{$L_`a~<8(2wT0gBdM}eXbySPsg937S-n#Rrb@M8j!)p})>aZ4kt_;f>;dHk z)5pn)bLHw!z9`scN&L^OVYa!%XlS5cg2^)Hi4(GzyBNGDX$k+j|6l&?|cUu4WTv%79Wnl?;Q zJ_jSh=MRQx*)l2xV&4b{J_a+=`9rc2%4U|4Or#0qNyD_pr9uNxO50<>(2v59D<<6NW=Bu}F+UGs+ur4p| zf79)}J@0~D{9G(&Qed6UZ^6Lz*!@o+s=WI@SYWP#6}kjDHAlsC{}ak_-*a4P;d2sv zZY<5gKU$>EFyTC1{pc@SY!1SW-Twh)ob&!WTd6n?TyeBpM_w@Vz@C_F&$lw*wl+O1 zFTka+9X^VUA%u^eN8vI?i^Ufkw!M6v*FGwnU){z@_$=B&1f@hd0&zU0<2}r4PJ91&jrXQk$d+UQOtn(G@FCMv zI{K{2&7Qa4w%K|6`qjmoHtbIZ1p{{ZYm6MR!33;vkVe6T z?{&bP6cwg6J}LGF6DRi|Ygd)O84xp%py|V%@}_*GVLfCJd0w!za3?9lj!EhTNa&l$ zUbqvnl+>)SOKb zOlNyb8=X%h@$_0*UV`X@EbLMo#vP^*Cdc4Y^b}%!t-KE&&*%0Z` zcrE-1Ogy*|;0OCc-heUPV&||LaggyJ)k*jv1f8X@4`oQJ(UmY@csCUiZ(#O1gG(O8 zB)-+pu~cU)VE971uriJmtN-pG$KEUDF8MdxU}rHg^*Sr(FmGhrQ7Q+QZnB&d&;&gJ-CC#`wv*w~C zN+WW4sx&eoGdLD{5~MzT%$NiOeXud$e2aOCDK*?It4nhWvvCRWCkKbsNE$@?<%`Oz zkbZ{>y9K2G2f8{B#ztz`Dw`+_yiog2qvvp=@9Au*2*(z#nDQv@lNP)leT4J zu-1@7G3HKHeH(N$?L0;|^k6~IN&rhxIKw?cZ^q=&*y=$sac6&=A&mo44|uaN{S1|w z7kYG~y!GRjR*v|F4I|f50aZ;LLS$a3JXCu@j*qnzv$fz;YBCpBvd2*zOz)xU!qM}P z{;65N=-)b)o$sEjy9!oNRG8iF&p!FArX$2r-n$rd2kk!l@_NgsuUSBbI{Eb4i~=*= z^Q$Bj^B^?#aRC_v)=!fuz3B}G-E)Os7%63T;B_Lu0w1 zfePmlIDhqSis%tS;XOU=UX>_`PF3iSC~ELgypFjqi$7(U#Gt`!<8w!|OQQ=kVhSX5 zw=lQO4`2fl1UPtay8X-R^H*2w>yO?b#?w{lr$BtORGbKF(;#eo29FQ->)JFwEB52a zGZtsVG&j=Q%abr_X`dkhSIw)_E(FLrmog;XB&0ywrj9#4`sAQS93N*7;#s$SK6v%j z!Pbp|+FqAntrV#2eDMz#{%j2Eb)q;xw_QbXT*rn?qx&RxbJ2?Mw?Cdb9;Z+9+7wjJ zO!}&=r@gbqssjps`tj#E!*e7sH3MIYOp9nG?QDoOP0shr*71q55wnrk#H>nclC%}p z!bSjcrjTf989=%83vD_wx%U#%NP!e+zULwd{d|nLi;#neSLj*k!umvMK*7coRXR`I zB_3@`;*(PkjghY!TdxUZxy%f6`-Xj8@2}5_;HV7ar#4Sx7tm52g>{EPW#J~hc`LyT|1^LHDh`g3%oh>A3ViXt-38tZ=@^Ydb~J( zoeU;3M$L?dK^@v44`C;<*iV|@#b0EB1XKD$H-Mye2q_dtw-%?FN%A1GKXu2YL-X@= zBW8kFZ~x}hMcDT9lBe4lbkDYJ#DkMt8cotrBqwVU6?>{$rkrGSaC_tnQN?9$dbo{J z5^YwKO(>3TjiiH#uJCm&6blAYk4PSe3q`_>{p5&8*unJ<*q!9ey9tD^A}ImFcwGtV zDZYnO4>=^Z?vBEQKB;SSU*!3zq>jskACMK;wtdtGB#SUeK+k|#i$?1xrZ^r8MTIZv z0#i5c)R-*v)YI89CEVipn6@SrImz~09@fBoj*3v93ou+g?%ORL2#NCsyK2pNSl61d zP#7LJq8SMf=_{wiw2D$Zf%{Ns2H(r553QHHS)7vJ>SPWoyAK`Gq0EBhadten~ zW?P;^UA=svlA)_Gbnyf-0J2;g+n;S7P3xUAwaW@XsMCRyxOTZ509?1+*+hu20XnNb zM5y6ZjL=qPTvmC0%|Tfv0)h?ZRvoq7tD6zLzWVPjQFm@KIRhJ48P-vxEqNNLn1BKq z>6DhH#+E()zct~b0)TzYr~QRGwc2<6`@&${_EnZck^>9FZbG*QZc!84?_@2eV z_lb#t*Wm>=AUYeL!&JJ&Cu#NLH&~Rd&PgI?vS&r;!-Dy6F{QLi?yTNYfk+z3S zAMl2AUUP_sre`30SJ^-FI3Ew|x)wIY7^Q8bFm$2@mF-j&Hwh)+wWCAx`j7Y-jQ#59 zc((m3CFSH$cT+|vg?e6R^U}s1c!j6J$#uxGsFh6|;!$HzF$9Vc&5#OIP{&_5ciV5^ zQW>AcuvVedN!($%*sjr9B(t#Ph}^Lq#c_@*;R|Z$dG}FEqzoBXt{<6dB9pXP-u}QV zQ~D~Dn3J`qkxvH$Sz{w?98JJb%>{TPJ)PFkBm^pFpkP7z8DV(|#0lP(nqp6POHxS$ z-L@jGhq{Sw0yt`Of569o?B|DxaDzGS@bzz z05*>7UiU99yX|wa2dx~5b0y? zCUSRjSB{=l+py1y`GN$@3Dg~@7YN9)IqNJg)Lb!`?E=1-t z*t1(<0tj8t?xdJ2h>sVA0hk8qPhze}6(Q}wE2%b7kMk~gr`YnVed82q;&_n%SSTun0b83UG`E)? zH2}=0D{ji|w+d@M7L}2SmZ?x!g1jp(PHi89hrVI=1#B@U;z~gxI*5jNpd9TnWosKx zqlxSTk2#BIZ4`G_7W21?GGwH_JVNhg|NQ*P#D79yl^ z+p^G^PPKfLj_MTx7ONAMgSU@wGGd_7M6=V;=R+9GGGQk~;!4FYlWD@2?>qIt(&#VH zo2eRg8eN>)4Dyq>Yf^@jWjXI1~9Lx zdgHc*Jwe4*&}$}8OHGh@b$i#w(iHKe|U11ob50Qu4&i7w2S981}7VXndjt-CN_&bgZrJ3LmVf5N!S71b2 z+jrB}Alpkzya~MsZV7fCWWvUuk;JN!ppX>j@jFx$s*iNS!-5E07r9qF%#Lv@R0PN# zwpS)sy*Ooae0wISo7QACldAc1CZUkz7*r=cH1e71LBpsdhr$K9adyim#JF3Uy-^CQ-BK25M5$`X8@bs-p^4pFNxMhghxdu5 zh*#(@xBwV9)MZvkAMr;vEpk!!X(6GV=kfcGW+S=;1v zUGTqw3(?5YturxtbyoA^+}+Kmb6ClGiwKAlS~%PA}hz ziVhI&Nx|c-JrVkLGO7cJ#`uNz~7KQV}WA;#jRz^;WmIh}Lds|s$GTTR;9}`Wk z>p5)mEx^3xeD&ikHMm}iNwCD$DHe5LN!nVkew^2f`7DPEh5RtTk>DeYtHP$ej|b?q z2iU%v5=;89$2v&nn$Xdu$RgBbE2{;BHaSaI!hBC-13IE9dMDct6Psp?;3PVZVjx0t zOM#>^F(?NgIM1ST%0B7(P#y#2Hw{Bf0s%X^$#=qCtlNaV6Rw*zi@|m(2FtP>^9*|x zOY%;1oGGrpDfAZEUYyB9)oSECjCQBfyEyL-*h#P7?Oe47?EHeAUn410A7AaBu@0D8{R`H) zc)j}lIrdcSo&T~sxaz%VcY3RTIcHDdD@u`qFR_(Fnuq8Rf}TD7vfudyb3J#@jVuS= zVkPX7S%UCNhWC6w+V=IUHhasiPu}+0_rK}BPP&P?&V*vVPD_-aW==u!a! z_TUDilb|GraiZ4-vx=jPor`n!6}xQr+u2T-k01O_wj>+AVy}BwaOV2#`uv2QuKu+T z2er$x9U31$na#!YQKhkV^s$vU)xds5GG%f>|Mq9BZ$vTB?e_on*HQoxc8VU@LNAZ1 z=*!W^JfE`|jeJbi=%j^YQUUK|HW`7PaxE3;Tw-cMp}Hi+rZ6TGauxK2FY&+{v27V3 zFDf~n7@djqci$lmEhRRd^as6i=n5=XWpHGYxq3KQ3!&X@3=*MPrs!x!+Xxh|fgCPW zA!?f;w$ZtxU)SbC92MN$<}lDGx>^0g8|m5azq8QZSS~Q`l*xSNxUy3~kT=J7s4>V~ z3ae~l87OBk(59~0{Lj)sPCeJgDpD1ue3Ei#$Jsp$v%tV<+$AI1z@!bhn+-QzIN@AK zYUDZgDO=D-j{J(QbiWk-M2!6)3D0d|-kVGA!^+eT#-VfRAMF3mRp&OLYt6_Xv>b_U zppMi8PZo+Qx=agsDo5jlfsUThg(K=4;x9EV>>KvneJw|h^n9*baK@c?|A_2^AWo?@ zr*v>P(7nV+43AXTY_#pVCVa9(c>o{lU5NB-j1vvX{$s*b@RsX6n<1*-KacO2FRSMw@4 zk532ft6!+Uu@+UZHXMX4FX{2&r}cEU`#G8PODqyBr(cW@s77qyfbtF^syAE2rKope zcNDgj^<0PL-U%IyjeB=Iup)K%>ms9MP!7YTN${!?UIf|0hr25~zi+SRW=>iC3< za`J@;Uw@>UBcR#OvkU-a_G4hnjiwAmPWvb zX9h?*5qu61P9PkoI_b4#snY=3QQx2N(qAru<^8wV75dVf!-A=He%VHU#y}kgMA#?w zA}kN@Zl7GTKKXT=M41KV$ks`4kM?W1)AQn?cXh(LE-cy(?psa0HObJNK)65-g6)f^ zdXb(d4-G=$;Z?#rz5}ULM~{)yjHjKK2kdE1+A#0tR;5!*cf|N0YY9z1D+Uz6$pi-o zT?NEsfZgqn1Pd&5JBix>M7>O{GsQ)n5LXieZA_(+Yb8@+KaEAE#NbSZtvg|(NO3W- zM3o^~eMrQ>a}`AKM|BwxLX=Q8S5jT0(TEj*bpzV|R@4#jd{#a$F;i@OP=alrl*UmB zX(KFnWn5vmqR%I~0>}F~=5?`SzL|EC(<-mYfhcD6lYDkb#cSezq7k z-bAb_Jf=*Iid{C3M==4|sXGUSisN<=Q1%ZarmMgD0$B@@n-J*_R9sEb7}^PwDjjDL zQmW_)pJf;fHEU{N=ksfYmD-BcF(r?$_>f_m&c?@OD3_|GD3u^&1fbqz^~OtNP7@Ak z0~Gy;(V3^>zr|jqk8y7Tua!drORW@DDFl6cVuQRq zp>E$QP(hbncKZpY;A-{j(z-WJ!*8AfXw-D&+66zvQhnbk`%EyoksAm7b!pzXWJj&h zgvG>E8XyX~0W~GHq(*jvocQ`9YxN(YuvNE8hjqKXit3~qaKYyG`auofL_Q4Iy}Ce1 zQ5K5A_Sk@fTZ!Gur#BMkGWACO>OUyq1XioWdj61?q8Tsa%6-G=(uT@CSJ7i9kW!?z z4&%r={iTq&3SwN{J9I-`Z(`C7A=yO}J#L^rf$1$(_jI^YAj^GZzEnX(IYDXt8VuVz z?2e1cWBTk1;luM~4jqGFY!g;cLmmHuXf)t3FtYSeQKww@*k7BI64zjDF#=DTD!!6l z+nWD8C;~%FsCNS=q$vw`47x^TeH$}x-^vQJ4_}G=<@@jKrMUkslNqM$3Ios->$VBA zui;W8^$TSPcAjG#ao8ydxr{5tQ7WYLnqX0gOrgPzhLtmf=iDrr)BW)^XYy^y80A$l_ zzSo(EVy4y=Yns54i2{YC^RRr+iu`~iCn9fX8X?Drs0fyA8=hf6kT%1oBa>j{fx;yG z4S~{^1T-U#2qe6TYn(D@6~vMYZOqmZNi@73jX~-rL`UN#@uE9VQbDk$`hlIdpJW(- zbYqcBnI{e&b93Z6KUDr;dSMLs)Xm6MiVavEtntWHh6JjTR*|fPj7NKLbXI?Cybd2E zS}}lPtKkVO*y11uDJbfzil2aWF-s!_+@BktN7gtXixVEZ!4S6}7Y21M4)&f)81WJz zPBcXfIml6Ec;Y+?mrj<2sUgdAgJ?>f1U08MRujW&dEpF3X!-(+Vs*X9;O=t`tgPUce z-f(Ba*;26ORC4$IcWgqLq)3$@g5l)_ad=z4|DM?%gMXL!ogS5!+Swe>ybv`m4h;UG zssy-H&O7}nV{B}hec(|u+g!mkAS^z6(o-bG7n-;Df}Qj*c?sc~|-OXjl8q>|}Y*+8BDqs65+1b_qwY{y~ z+;nVE%!f66udcf6b$c*4Z@+wvTiflk7v0xgScW$KeR|O!bY5PczZeXzkWGO-eQ`SY z1#4dq2HpP4_Stjz#DJY&zrq5*=iPp{*LgW8u7`=@zFiex(S{|Y0q~U=B*l0s%(r=j z)Ieaj2F&@a@+@!DshX$?jiIj_)jrv{C&ls zAoZZiF}e>9*FY$t8BIjI$ga0H(iwJGaX2qX)iB4>562Z$+U&}>86gNapD)>b0nYSD zgGlGff?3{wt4 z6sUff?A?Gt)^LYC5N@mgIiaN_n9!-`BP7Zlwb123_eBr@M!iUW%^mD;6d)x+q?q4U zKY))j5fZbh1W&PLoDrk%?7AWTSK9Cm!4T0R?&xD9yJ{dvUrCl-A{86N={}6~$he+K zX$A$b9LaKgC8{<*2Yxybcd!Lm#!tCi0>Us+Ui(uo^>q*xbOu%0xF1(;F&N4Dz+Z zxu}T!M5Jyp;VuH^uijB9F)UUTR~U0*;1fjIDp!BZN3ii>Hi#Cb>M z;T>?V(?+V1xbp(l98vXZ*2I{WEL|gG6E#3UP*b(?tjH>#bRK+>A&@BpOfnDFXnO;u zZZpMonb004jhWWfTLy$#v(nS+VCM5Vb|cV*r!H9*EjGoVNPtlkK6?LsumpPrLneL% z>Z`@T*!X^e@7{>zOeW@|2PTbYjeYpGk_2yFjA}q>H_LdO#4rIA+X2(WZ%VeT-l^y% z$_|~Dh(z>_0)y$)gj?O`)lizlfG}xxUSFvOKmN_Sr*!ih)gW@9;ssw0UiGMl)mIoZ z!N9?rcIQiYDH9^;Z7poD6AmE7eA7Pfwb}FT+sljl-}k#G=e;g_Gq_y+%h`GN#f2gP zx_-@`w@=z{I&bkMWG-=6d@-h9=0 zxs51-iS_y5K{#m-bg=BVy+P+?H`B#zut&r+nP^!OUfb4@Fmd)jObdlk&mNINU0rqA z`Nerm3e_ucLS3JCF;?2|b-H~pm7jJx{jHEenTU;kaN1|x0c&??V>KZb2E+8^SHElb zh!g8tO}~rNhxubb5gvUB^M|uRv)aZ27?8>zhIpQ}lP)6dUv@hf48FX$PJo`P-(9VK zSMmrNBRx%6sSS{xIV&|H*k_FT1TSdjEj+5D3mzc1Tm$oIz#?rjsV>%woPxLJ5YSF} z>6hfCO(F!a1mWtZ$_(WibJ<+B98^}*9h4i7wP7C=5JUlTjE+Y`(i^`(S~)Eaz((hPXDI*>!F=Xndja!lm%BA$zaZA6$D{n`6=Ia_opj zuXKn&N$J$=RIr0DSkmWC)5K&N$LfY^L><1X)@cpvB`?8I;t^I}1d%GuuvUM98;%oHJTV8Su!!@8Hx|NiTLV^3i`ALGkuvIkx+n`$+wAOm6O zMi_tT`qCPQaRE&Cw7++Tyjmbw#!fj5Q8!!@dA0_Tq5E&&e@C8rggvhWrLGpt<+qNQ z1?m3~ng$pFHQ=tn%pjwSn@%hiHCxJJo4}WAs8F(xsQP?FMF!leGSI>|}-H*ZXD}Mt7 zsof}OBKxhwe#%9PPe8jst{^Dh6dOnT8T7W4`@6NLYuc@FYhP}4YhOOEGoX-qq=mO9 z085r7Otn(Yv7TU7Ym{&RT^`-hxgGGYlj1+c_dt^Bb6N(EgK~@*IKGmvkL9CDj0a2p z)x77Dkj7&!TITdP^$!Llz5*q49HVCV`9l~jZAscmR{T3z2I-1L`8yswd`I$nBAv8i zOJ8L)59fg!W{zWY<>A$IHo!9lV{z43bYhd?-0-I|=G&3_u4=%??lqddROL>~v` zI>hu>^_wT7{fKMU4~!^YFIV&ciXd0mj%|};ryfuU|tZUfqICeM#x z0niG_7B)y(obg34k*Cd+hgzL|{nVh3+vIljm~=tL8K>7gTER(WM(}eV3w&_PI$7{i z>GDNPq$z44q%LM(R}jVdEdumbt)P!RdwL{9#lKh|6+hb=6+e3(ocuPByGlp%L@!fC z!3cgAY*{MFKW?RmjOo5PFs616)Ca6(%r?_wJX%hRI1XRDIC^|8FSm2+HxS(;nheat zbwRJl?Hr3}$mNU1t^+1w;ZTIG830Z`yTa-o=R$ zTC=U-wH~1_U@lmJnh6*s6Ipk zs;mKzT155Y^dvw4KIL-%D7D@on6YO?O}2P z(b!J*(~qD5#)!E*;6MFOA0i5~r{ol&sY|qt8%`C~v#7W%@tOF@$8*emSOG6kw#YIq z{|3rFVR8DQvcgL>rRlgC5Ys?sXgSJimcjmxh9^@`sO065^&!3g)nf z@1`uwgT?0$H&dlHGLKP4Ua6T*8tJ6$rbbaF(>M7^*+jwB+xgl)lb!pA&(7-7t&IQ> zH@lT-DIIdlG%Nl|;4-a_n|90N%3s*nGx>{Q@sCUp_jS+rvZdU1B+{z|c13iJ5&))7 zsjapV$L3G7+Q-IpJdNxs4kX2iqPAg@N^buaf{00g^JmqP_#6xeN+n8;gO;)w-^AZ zU!M35U;8MR*L12wdV;frlOV5*K$7B~?r=INs}G{wQalJ_A}atHU{%{}u89W)a}Pm? zD;UBAA#6JR!lte$=Mos^5(wpy?T_-<-yb+E*R53I%Uo zR!N<@nmDyCS8iuSTVb5)3ZoTnbx6ATFx~?JO(*t1on$r8>-jue&SXTnVxVnZEXyYv zBY&F+dFA_LF02Cst@NQaH4HVC_dDurRECSIRK%e?C}wjKt_uy(a67&2Zf~jMiW8iQr8Jj3U4@qH8U*Y{rg&Z9hZMZ^a5f2y3#;;oO&`f$+uAYXP>C$*`fn58YMo9s+aN^Yf??yXMh!$?;uPQEDa#z_fVqAVmO z7>mY0S)@);3;1*tEx0PNm#ov)LbUO^{mD2Fn~W$oy`&&D<-j2WMmHgrq(vmd&_S>W zq@y=e{3=+jSSI-F5%Lco9^X-Rp}bl_RnR=LL|QACCtZ>`lK3`JNO{8%yx}LUp3n*x z{3$SaD5ts}eul45V=oDS4*!%sME;{;gP`DhggN3-O1o`_+I}N;2Q)3tREidD9B@d`@ za9R9>mV-dnIATMns;b!*Lu}f?{A@G#a$AQ$y_lGRJGh{|Ya95WigpY}^m(JTgiR#D zwN83TNn!J0w`=tMBM}ea#U8!vk|_w~s!*OLn;K7#hI{MDVh#=s>Gjge9~co5&6a}X z5*;WjX>_|={lg^>p2C%|+YSXgB}ed!8%0~MBFQsDsLfTiJ1Xw3$5<1Y7ey6`bOQq5 zhqU{E?Dkh|&QaDxWwYd6h)^?W_S59As*wxBcgY&}xJ?Ju|GDts8r^t@U&X3?f?-5r z9me5Q?dX!W--)hnR*vDTPH9tA^C-ro2~1LZ zNQ|M4^edQDsh{~?lmpJSUdDNbDogQ0Ftw?v+56&NcR!&PB8@5T_wIKb45Qva;?vb6 zT_C^h{>;NQ_n_)iU@}d|*S^>-JbfO51-EN$tS``_31UBa^;w+vCm1}~BA=A1vJN~+ zTdw)gG)eZ{NGtc)%s=C!f$C6-e*3$tw?xZwd2#;mhvOGlXUD;l;3`Zo+d5D*(@U+G z#N=152zwC5!z4!X=2C?A08??XWNn{#!Ko_3~=>v<-6VAycQUNrDWBkMWSc!?zV=Cmc5*c)QuA#8lwU4LsN zSig=zy_qXIeWIzJ3+l$q<$g+VPoVT3Cz0G)@&~z35ML|0eG!rFjeO&`KRbC2Vr%_L zqV`GD>wXi_5v%(@Wt*1=HE2!;EB5bm5XOu&bBymp*L9INDvpZEhnP1u7?iFmEb zIMr)(t@?O#rj$|YuSe3N2SCc<(n=mk(m<+?Qgt9s?~%`ATSI28+(=+B|WtgGFXlao89iDU{E>0I!{V4Y^-^M`P? zj2KN=b1Z)rVZUNXt!yo+Z&)E?##aQBIGs@d)6&DR&j0d^M2)X5+Lg}Pb@feSG520B zaDrp3!4Al$7Gv?ny+LCL1IJJSDAn7Sz+>othw`KyQYpB4kQ7)#!!hLN&|}(!4x-%- z_@RiXP0isd2U>4wDVa0W}<#xIPRun1I2hhpT^QPl#nL1gF==u?_p#g2(_*qATC_U z(|pJmJQ=2=;%*ELri&@d2nW5XDZd|#+C$LT3tuQQnoiMxzq|q%@~QGza89ahlr|yI zP0M{i3_WzpdO!>-R2{j{`cT0ptWAGJ-oXv4YNKyW7ePewl2`3J*WVl;u&8w%&Bo~5 zYs~~kO2$@WvL<`nN^U9^INJ!T3+cUX?F{p>#d#~3f}xDuWYjKb2M?K%YY6v>-Abo@ z%{8%Nt^Z)jtlA`)jQXuQS> z6j$@E^2bpAhszL{{{ovRus<<~Dj8rxT5`05Xx$$I(#Jrv)8P4qatX1S5!aMK(t*+( zX+9)|3^gx075vsCS%!~BuOsrHgaj>?sLADq>4f^1+gg>q0S7+naO)a!_Mvuu`OWzm zc)WwJkFJh_v!gfPoWDJi>Bos(d}C!%a=jAlNxjbI-5OW978Tp4bD6S;Q3d6Mc-Fxh55z%^@{jrTm?=tE s(p@IqMHok20RB5Pm;e9( From a5f6edfa6c2edf9ecff17e5781315fff20d024d0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 09:30:03 +0200 Subject: [PATCH 130/162] GetCSV refactoring - changed to mirror the original model class --- .../common/collection/models/CSVProject.java | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java index 19606a09e7..0d939ca0ec 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -13,23 +13,12 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "id") private String id; - @CsvBindByName(column = "status") - private String status; - @CsvBindByName(column = "programme") private String programme; @CsvBindByName(column = "topics") private String topics; - @CsvBindByName(column = "title") - private String title; - - @CsvBindByName(column = "call") - private String call; - - @CsvBindByName(column = "subjects") - private String subjects; public String getId() { return id; @@ -39,13 +28,7 @@ public class CSVProject implements Serializable { this.id = id; } - public String getStatus() { - return status; - } - public void setStatus(String status) { - this.status = status; - } public String getProgramme() { return programme; @@ -63,28 +46,6 @@ public class CSVProject implements Serializable { this.topics = topics; } - public String getTitle() { - return title; - } - public void setTitle(String title) { - this.title = title; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } } From f3d575f749043ecf12f8b44ea18722dd684d39ef Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:03:57 +0200 Subject: [PATCH 131/162] GetCSV refactoring - changed due to changes in input resource --- .../dhp/common/collection/GetCSVTest.java | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index bf5e3dedb2..292fa46665 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -26,15 +26,6 @@ public class GetCSVTest { private static LocalFileSystem fs; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(GetCSVTest.class.getSimpleName()) - .toString(); - - fs = FileSystem.getLocal(new Configuration()); - } - @Disabled @Test void getProgrammeFileTest() throws Exception { @@ -42,11 +33,11 @@ public class GetCSVTest { String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); @@ -57,39 +48,39 @@ public class GetCSVTest { if (count == 0) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); Assertions.assertTrue(csvp.getShortTitle().equals("")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } if (count == 28) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); Assertions.assertTrue(csvp.getLanguage().equals("de")); } if (count == 229) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } Assertions.assertTrue(csvp.getCode() != null); @@ -100,6 +91,15 @@ public class GetCSVTest { Assertions.assertEquals(767, count); } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(GetCSVTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + @Disabled @Test void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { @@ -217,29 +217,29 @@ public class GetCSVTest { while ((line = in.readLine()) != null) { DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); if (count == 0) { - Assertions.assertTrue(doaj.getIssn().equals("0001-3765")); - Assertions.assertTrue(doaj.getEissn().equals("1678-2690")); - Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências")); + Assertions.assertEquals("0001-3765", doaj.getIssn()); + Assertions.assertEquals("1678-2690", doaj.getEissn()); + Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); } - if (count == 7902) { - - Assertions.assertTrue(doaj.getIssn().equals("")); - Assertions.assertTrue(doaj.getEissn().equals("2055-7159")); - Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports")); + if (count == 7904) { + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("2055-7159", doaj.getEissn()); + Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); } - if (count == 16703) { + if (count == 16707) { - Assertions.assertTrue(doaj.getIssn().equals("")); - Assertions.assertTrue(doaj.getEissn().equals("2788-6298")); + Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("2788-6298",doaj.getEissn()); Assertions - .assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa")); + .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); } count += 1; } - Assertions.assertEquals(16709, count); + Assertions.assertEquals(16713, count); } } From 58f241f4a2abc0aba95b56248d2bd59b2507b324 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:04:44 +0200 Subject: [PATCH 132/162] GetCSV refactoring - changed due to change of input resource --- .../test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index 292fa46665..d24083e0d7 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; import java.nio.file.Files; +import jdk.nashorn.internal.ir.annotations.Ignore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; From 5cd57145303bbf65ecdfbde572f874ffe28a54a8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:06:49 +0200 Subject: [PATCH 133/162] GetCSV refactoring - added ignore annotation for fields not in input csv --- .../dhp/actionmanager/project/utils/model/CSVProgramme.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index ea89a75eb4..df06fd6b4c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; /** * The model for the programme csv file @@ -22,10 +23,10 @@ public class CSVProgramme implements Serializable { @CsvBindByName(column = "language") private String language; - @CsvBindByName(column = "classification") + @CsvIgnore private String classification; - @CsvBindByName(column = "classification_short") + @CsvIgnore private String classification_short; public String getClassification_short() { From 5f674efb0c36201c797ef282b1b2b66724ebb3c3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:07:53 +0200 Subject: [PATCH 134/162] moved dependency version in external pom --- dhp-workflows/dhp-enrichment/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 6f638b1885..644ac21409 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -47,7 +47,6 @@ io.github.classgraph classgraph - 4.8.71 From eaf077fc34bf43f8b2b67ef5a9a701fd78a1eadb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:08:58 +0200 Subject: [PATCH 135/162] GetCSV refactoring - removed not needed dependency --- dhp-workflows/dhp-graph-mapper/pom.xml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 6c5e4609a3..17146903a6 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -122,10 +122,7 @@ org.json4s json4s-jackson_2.11 - - com.opencsv - opencsv - + From 964a46ca210afa179b2a1735f603653f99a463dd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:11:18 +0200 Subject: [PATCH 136/162] GetCSV refactoring - modified due to movement of classes --- .../dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 71b20b356d..26b1d59931 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -100,7 +100,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + eu.dnetlib.dhp.common.collection.GetCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -112,7 +112,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + eu.dnetlib.dhp.common.collection.GetCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj From 01db1f8bc403f1456d7004489af2291282721278 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:14:17 +0200 Subject: [PATCH 137/162] GetCSV refactoring - removed not needed import --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 2ed76a72ac..5b00e9b6f1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} From dfd1e53c6982326338abca5b5a59f0d1e2b58d61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:15:12 +0200 Subject: [PATCH 138/162] added external dependency for version --- pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d31ffb88d5..a5e9f05477 100644 --- a/pom.xml +++ b/pom.xml @@ -524,7 +524,11 @@ opencsv 5.5 - + + io.github.classgraph + classgraph + 4.8.71 + From 32fd75691f17af421c8f5e7c4a8b4a0796c4cb9c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:15:42 +0200 Subject: [PATCH 139/162] refactoring --- .../project/utils/CSVParser.java | 47 ---- .../project/utils/CSVProject.java | 200 ------------------ .../actionmanager/project/CSVParserTest.java | 31 --- .../project/preparedProgramme_whole.json.gz | Bin 15986 -> 0 bytes .../dhp/oa/graph/hostedbymap/GetCSV.java | 74 ++----- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 112 ---------- 6 files changed, 21 insertions(+), 443 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz delete mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java deleted file mode 100644 index c53cd2127d..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVRecord; -import org.apache.commons.lang.reflect.FieldUtils; - -/** - * Reads a generic csv and maps it into classes that mirror its schema - */ -public class CSVParser { - - public List parse(String csvFile, String classForName) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - return parse(csvFile, classForName, ';'); - } - - public List parse(String csvFile, String classForName, char delimiter) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - final CSVFormat format = CSVFormat.EXCEL - .withHeader() - .withDelimiter(delimiter) - .withQuote('"') - .withTrim(); - List ret = new ArrayList<>(); - final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format); - final Set headers = parser.getHeaderMap().keySet(); - Class clazz = Class.forName(classForName); - for (CSVRecord csvRecord : parser.getRecords()) { - - @SuppressWarnings("unchecked") - final R cc = (R) clazz.newInstance(); - for (String header : headers) { - FieldUtils.writeField(cc, header, csvRecord.get(header), true); - - } - ret.add(cc); - } - - return ret; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java deleted file mode 100644 index 268d5f28cc..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java +++ /dev/null @@ -1,200 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.Serializable; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - private String rcn; - private String id; - private String acronym; - private String status; - private String programme; - private String topics; - private String frameworkProgramme; - private String title; - private String startDate; - private String endDate; - private String projectUrl; - private String objective; - private String totalCost; - private String ecMaxContribution; - private String call; - private String fundingScheme; - private String coordinator; - private String coordinatorCountry; - private String participants; - private String participantCountries; - private String subjects; - - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - - public String getFrameworkProgramme() { - return frameworkProgramme; - } - - public void setFrameworkProgramme(String frameworkProgramme) { - this.frameworkProgramme = frameworkProgramme; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getStartDate() { - return startDate; - } - - public void setStartDate(String startDate) { - this.startDate = startDate; - } - - public String getEndDate() { - return endDate; - } - - public void setEndDate(String endDate) { - this.endDate = endDate; - } - - public String getProjectUrl() { - return projectUrl; - } - - public void setProjectUrl(String projectUrl) { - this.projectUrl = projectUrl; - } - - public String getObjective() { - return objective; - } - - public void setObjective(String objective) { - this.objective = objective; - } - - public String getTotalCost() { - return totalCost; - } - - public void setTotalCost(String totalCost) { - this.totalCost = totalCost; - } - - public String getEcMaxContribution() { - return ecMaxContribution; - } - - public void setEcMaxContribution(String ecMaxContribution) { - this.ecMaxContribution = ecMaxContribution; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getFundingScheme() { - return fundingScheme; - } - - public void setFundingScheme(String fundingScheme) { - this.fundingScheme = fundingScheme; - } - - public String getCoordinator() { - return coordinator; - } - - public void setCoordinator(String coordinator) { - this.coordinator = coordinator; - } - - public String getCoordinatorCountry() { - return coordinatorCountry; - } - - public void setCoordinatorCountry(String coordinatorCountry) { - this.coordinatorCountry = coordinatorCountry; - } - - public String getParticipants() { - return participants; - } - - public void setParticipants(String participants) { - this.participants = participants; - } - - public String getParticipantCountries() { - return participantCountries; - } - - public void setParticipantCountries(String participantCountries) { - this.participantCountries = participantCountries; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java deleted file mode 100644 index dd7e1910f6..0000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ /dev/null @@ -1,31 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -class CSVParserTest { - - @Test - void readProgrammeTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme"); - - Assertions.assertEquals(24, pl.size()); - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz deleted file mode 100644 index 620e1abfbf28cf33bc6307fc9ada17ce4a048a71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15986 zcmV-&K8?X2iwFpXn5JL=18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUF6l-E!MV zlJ5OJ1!69?JrShGq9n`hJ2p*?*<;HJQQnz}*oX}fERq<302@D2Yj!tcVqf80o%25X zBJ(6Cvwnc8ViiSz_#xfC=xB>1P+3`3S^4|rfBr7^!r#6B-P^a97iVvN*ZZ9p%?bSd zaqwpFra%7r`uz39>r40#zDoTx!0#saADp2dFJgC1;NxT!#pyl&4*m|@aGAMF{ELM6 z#=uPyf8l#>>PI-%H1d3sx`E@Z+#n#~k|fT5In%Wpr#e7TFbw=|^l3c2nf~do1NgT_i8|7MP{QQ#M)G)jz4O2S)9WVCRr04iB;1Y&TV|M|BJ^10$aba>PiQ_2tlQsM#^nz@TUu5BPR@=Rb43 zhXDT&zfT>1PTYikIU|8j7DIMQbm#8H<5 zv~b2*9Bl~w*iRG3^HcxtB*gc1z=={@U~9e|kgV*m^S5V~BY39nvCl3e#*aOq4UWwt zS@~W-*t;J|l8^wFA|P%^V*1%H8O#y>{RES8{PiY|9*IN3M?a3jH3^Y78xk*~c=FMFj$s)23H$2NpT%r1wSNBZND^g0kwb7zd>|dyhku=}+!W|eHn3+C z_)D20JbX-it1PX)5e_Ij|LpwAu=CGg=by{_{*L$|jO5`InA~wgoGo!?jK**`*{HLw>GtnxU9Wn0 zt1##C{NmhlBfM2_ggbWR@W8_fg?}Rfz;w8D;ErBo1XciOaFXXFC2Noj5nMq|mJnPg zNq)GSO~A=ddd}0z4+vgzG2C19Dq9ghb^?C^7v`GaTLSWk5(9n-bMagpbG-q90rDb+ z52El9=FPSMXcDV13ZkXIy-3Yp9M_|d1LsvS(`fcL)NDZH{hltz$?>o91@et?LOn3& zCTTLD3e=T)nS9QI)bG!IIN3mH=*G`Ta~!$e3RnhW*GX_|iHY`{B-`N3V4|~UXO7fH z8@vG%ocIa^Uy}MBe3AIekV-&z{)l(t+<~hxTex_@>C_{T;ckFeBHXr%ehj*x3&Mdt z;%-uR2E0-RcUJBrKAJ?E6&;3dZz^Hoz5Mo$77IcLK*j~+S6HmjV;u9Temiq8e*2dQ^c`cL2y49Pwwpe8Jz@uNpS!Do<= zkfcvhjEY8;V?GJz8OTE)))~|g)T&qh1~%rW;t$|A34u#O)%J@=+#}(N+A4HL{JQKp%-E;jr*H?s zzg3$arU@6SSTM{8WF>xmmkErwWU- z)ju#PLMdBhFxmz-04PI142<~XQ{wT2)xGU{%oV_Tkg>&B~&>|s)?U6V<-!8d1jLTid?z>ICjQg z@4noQhd0jTW_mxl|9U@mzFfl}H(&m3ct81a;|y;;IQQex$D1#ozWnj8&MVw3`jX`C z!$}B=VVZ$m4Vz^WhS6i85a%lJbN-SS7v~TPZT_-Kv?_jpi#PAA(M)BRPlg|EoohEq zox3QFm{+{xsHT~sdZL@AcPo2xaB05p&gg&g&IE;rs-R#Rf$BqK?TTAH_|KRu@d&Wf zu_kjkpka++9YVD@YFsWS3a=x-L7(Nj(lU+82dWu@ zR1^5pqdxe{` z6WA!YE+IW6e~#i{4(9L#4@3fk_VPOdh{PRrM-~BdZ97xRM3g?3&1x?TfFUsS3KXc+ zxA$?Y(Uq{LHQd?VMpI@+#kz?SKg?-1)6!mx+!_DV2|8s zikwnv&^X{fm(Br3vwUP(2ok?XiJy}V3Fky4PDqmlg;%*zA1=Q&n(cRBqlC zr=~hC-kOg0fNrpJgI|ruBXg%8h>bNmwAnI*ZW!fL+`7}I`#sa{F1(S4XCGCYYK!~D zK=WR_RRz1nwXr2!?qi?^qa5+l4gExfsW-6;LJc&s9_+06(HAijn%06u(flemCQe9D zZ^O$51K^n-piE5s0NoE!?Oe#-Y?y>R+&_ZW^M8I&5*0?jLdkmxfOqzNr-s1#&@*usfi4b(;X4HF?1W_He3UL&zx72+q2)l2Ma4n`-MajJ`*}{8UqO(IBlmZ ze~}LETt9Gr{z_GlgJ)1Zx?MLdT9n*oY-;z}=R7bgC55r0fW+phY1^^#y`MXZi22^s zb{{w2lT_8jG}bGe(<;e-389 zp)ud%JW1-#eUdmLd9YAWQ-=8T$fVJe8_!`1&XNiNAs34&fjzK_qingt@A`^wrvgt( z9s7LdG@C~*d>Xbwt{Q_rv&W#{DVdC;FHB&evzj>05bUCEW6&0gYKcJ`->o`{-um$?XM!5yc=qP=ysWY`Cv~$uQ~lS?Ki4c1AZP zI+|$SEIUI<;e@2uf}f;dAG45aK;x}$uGDNYI5%FU_2acof1ccbq$v~EU+x^H5C0YK zt)ri@ZGUhIX(0#KlDHe_`2^;rB`2Gs=vS~m)zDE)Blygl@k5fkMm%aqd9j~xAu{(9 zHv#&NxRSf#Yz|H3tu(AwS88%LoYAXVy{yw12!TZp#g(_p))=*MH-5@eL@jlfm6ok! z9q4t0a}$(FSCo>&Y9mnWXFukXib-MNA2#tui%FX!T=PnpLz3tE9vRd8aKkFVkgmrM z(e(3pYe$rMu(Ap-Sv8Z8zKsUqh=&i0q7zrB-pbgfyJc`@p)l2l!6Sjq)`NB8F8#uP|9axW8a+j^_maBg+{pPSc?*Ytj93 z+sc$Q5n;OY2Yl~s+kAY~2a@*ZG3p`Gcv}JFy&h>?EbK={Ljqx+!PPQfV}$>l7Ww|D z zwwmE+WMEuI>;9Q$<+8#o_tEtOcjo8XdX|u#+6#QQal`*01^z`qs5hsQ;>BpAyqNnj zXvB`N3-Z#jS}C-rxVK4%24rP3VPr{GrwOqo^R$;AiKTO>$)edF#z3c5**CfQ@O65B zHyM6%K8=ST#&^??lUtZXd1t52*#QP2&aI!YdjaAbqP(Y=TKlUI37Xa^majSnde9Wc$=8&*RAiYT{o?X z`Z_dK`=r$!J=mcFnal%fndvd7+&pCWy|foN4o*lju~IPbkV=Y{)7ZwZk-$UFM{Q)N z#>1sKf_J7vc@DwMT>D-e&2YbQS6W2Pz-vMpcM$qI4LeZr z&NgdLf?A=nA3s|8D8?jJzo)9JNbn|(7C0JxkY-N8IRQx*JLxm(u?o}{{~k#p^E`*M z%AP$IB&P2>pr##g>{v_R7rK^(R;~RrTfWNqmYiLK<;==MBwsa0P>~%F?o*LSBNrrt zoSP0B?1u_nJ-B?E%G)ESN=p-2{=y;3mJOu}D~vp`&8b&#>$nRD@?Q-+%BI zzNJQQeN9O-NtVay3`|6AB?|jI&sLN&6`pY9oy=TzCQfo$YRcQ0{>B)m8hmffW#DIU zdQ~@4o6X7lu>0rGvUr6_dtb`1X6wm)pXigiIDSp z&wqSpE^&CPd;f|UHiPPZ^jS?LBJgozhZ#ZwU_dfcnR$ah8s81;TgJNUyYgSji5+#$ zyXn%Yh9{uqmYO~DmpTyoXW}LzcI=|Ul%R}RqMl+)&^)@EvVru{YHaB;!?X&;+LkfP z8r(8U$q36K%CEA#gvl$YoBK$M{=`{J*gtWte(@M=}1FT7Tbi@!1FSleqk9tM_*~x~Wgcv_`o07m6$|J09zi|4MvT|3 z;?SEc=CJ>SOA)jtpPe;~Koh$>dK=DI?F;q$(hoQw6s_MkAntvhj)u${M@-tka zVAenU@Bc*e=qY({`=bm$8RqfY`lv4KL4z$KoN+Gb~Mtlwt zhtB5!3QFq+j#4D~6bme7_9C-|HLd5Jor6s^&b5wlsp^=7>gcuLR-NgcAHOQS2Uvat zjI;*!;sCiEX^P$f0nRZgu4Gz-|lC*4X zbAWTJpX&(3zYlvXVSu40Q|D=?yYXCY+tBK5Y4l(*$aZA`@&GN(gYwf{95ASEQ@YW9 zk8}*-zLT~N5Y_h~Ohd)CwL4I>9aFB0zF?N$9Br~uCkaA2_ifo=8mQ+zYq`yBd|Xe> zGRYy8WgDf4Xi5abPM}5*O>&H9pfzt`iNgHCrb*Msn>1#Gt8Sk=ouG2JJx$GNWJX?_ znG(g9*uo)ph=ATw0lR0;b!?q_WtX=RU3@QxnZEe) z7fm|qD!bG4kf%+JH#eKcB@D^0sUsQ-!YS4wTlnXk6fGj~Z;LbY=8!ja9?z_cCEbQC z4_E_rCmEX)vfACZN|rj70r&Hlu|$a|^gUkBw~A8w*L&wr#6nH>C!)BW&#wmcdI14EhVq#1JH%dVBi%5%LxnR zPV`mjujWgrkp%T7ut<)}EieoUB3X3eC34N8ykS(&MCfuff&wyt9^4*o-@F|)^K*DM zYkBAB)we(`WtgpVrlxAk-8@x~v_)GO=-IfuK>_Z^O&rZL_IfP`!($tkszn|rj~zT< zz>2lqT0sLO>3K{$JaQnT@Gj&H#R4s=1c#{kNQ%%63Y#eOmEL@d1jQURd8#Jdag_Spaxsu1l< zQNDIivluK0E9bg4_!C2Z!zRf|B042+CA;QL)wxkh)77cEs#mpUNsT(+URAVtyJ!9h z!yeoezB)Ia!9T$ojzRlKX(m8UzpO@PR9ZE|kZR?-`%f+1#6Gncnjl|=pno&4cqK38 zyjFp^-oDFG9{O<1czyDA{C)LS8h*HiMJ8SQ%T+3_2Y2pnQog5En?dL1{*QI(irAmZntTZR4yfg&m_1G!c*mWN=#)xc z@O0&-XtZsJM|1sjhmf*(2feA`Lv#>rl@HeeqsmM9W?j8L+lnGFMNBj|Y+qna z$J5~*Yk6)VsCrBF7hbI%086>u#g5WkTn0VW2Hkc!4GdbsWefJ?K2yb*_2c0;b+U5f zJQMyrFe6|G+BXk@Smb0Qe~NKU5|YA5N9S3~U>U2~G!^4~P& zOUXgPBXaFxI1>NjA&6_ESSs<{5mpqUu-JbB$5wn;hI*~lv`Hb~a-28S%Gsa+F9_b*$FFtrt27&InMe&=!HjUCI>0UX zCPU9-Z33LwPad2FLdb2W30Z$t*>teJv7|0BPVZ16YUeDDe-&+<66@4Q?VK>5| zT7Krdx}9F{)%SRMdj~5px%s0b;-sT7trPz9%MDE~wbN!3 zM;nmta|ZB`qp4Dfh)6s%Er=H5`WvQ9|CenDJ}<3W)~vB=m^ylZ`Z&V)wZ+@DO?GAG z$(7m&dK|83wgm|HIRd97MG=GE{*4yyiw1Hs^|fW>MR!)k2R}E@%UX9#}5%gKEll zT-ArZ_jFx%ht2y0D+~1`2pc+$qDNK=t?T=(>PwC>7ozn%1wE=~Zr^gDqvnhm7Gu>| zS0&fmaRlQ;4$wi9?<$;E1M%uHKeq)0&DFrFbP2c6=b30p^@#N7%fRv64U=v89tOb< zGRa*ESciW58RVbP{D6(tlMNC6EpNPCH0x_&?$rW?$2AMpS7kvtd#fkYP19`tTnIYs zT#&k09k>QF3NJ!*CZ<#!5UApL98|Par=Aq%DQ8Cr=?pl|N8w39uTe}kctXOId(gq& z%wIztPDN3R7X<;U#7*Fqh9d=><3%no-3+}nX(S4~SqpCZZ2=9!;<0QzbjHI)zoNf| zC*Wg21JBmjz-g`T7}SeFb7#U{=@+aM%SIWHFQ!;JJeA*CEu390h++Zw#T#$!LU@~+-f4KKbG*SOIMH#-w7pob zm95&UjTIl*-pw+)aY&XSuixf;t0p zaABag^N{^uGd}_z`{6gy1P`Gw82-g14}3X4N0{&_oqj2J^XjLcCjFo9C%@OcgF07Q zbW%1n2QBx;(f)@A&6aneQd%v!D zRRftkb)ptaV@N2)+}(NQoFvZ7z{K_(4G7h-3GuNZqZEf3+YRs15+Re>&Sv zXyQjX z6zV&(%n#@#4S%5`9|Zmq=g7kcOx)=k>g6{#9GR(gYSqFjd+37nKq4s%bwasQ>bFNpZvc!@i`ZZ-8*jaid4$nKYRBp<5GzH8h zXWCkoaw)phQba=mT3(7M-?CcWT%~*^YZcy(1R(n}nyGExtXhxgX$p$t63$9kSg4|N z<}8Y_>0tp{e!O;b*=ZbV1eY66T^I{O!lhboCP4#XK21zkyhcGfh-B_8Sw^3dV&gr%9nykF?j8(!k=SGd6g=vhvs+An^!TX|=CXHQWsoaBr zCG&=990d*FY1El~Uuh!E=HFK!Nxf){m2{V{i76=(l^K^-2m2CeJ$@=_J26SJAf<^X zn`{>NUhv#sVA{$&cZFA4-rX&L8z8Y-uin}T(ehZ$ro_2=3|~%^)R$ndC%Vm0t*p82 z@oMdEwDqD=W9QY+1_rWFbwL@B4mv0ODRl^k86M);*|HN}hFZ4C=d4ULk98Na zqB^fyOxtL6W*X+u_5sfgedSIcp&IAUG8j3H=;Fx9rii7W6k*~I)J9*x7e=@^6HhKs zS+B9uy%5}dVQv%iMuiR!I=Gc%B zG!*xt$LmCK_UPW7lMM;yd73s%Bterf@UhiPH!Gsh$h}U%qOWepmEdFP8C6@ZMv2m* zil##}(5n$JHZ%SuLxXA|@RVqJHOtT9-!RN7dZh3Eb@JZ%;cJL?GyfVhvEmG=XQy7d z7>cOp4_oS`I_@E1Ni)b6k3F?Gzgn=tP^;Bgqsm8PwJut%N|x{S=p&qdZkR>0zp;o* zQYT8rK9=YXGOF31))%T)rv=^OQjZee(%nFuhcJ2y$efE3){=^XCoL8{Ry8Lct2&T+ zQ87OQ%ywD3JN*Rvn6d$DW@k+{^-fMSK)8m0XqLujG-@qd?Rg0jb3@Xqw**vTsknB_ z^4a&Hb}PQ7lmI-q`%%|o=~g+?G!L&S(sTKGsobgK5cWf0fpMl@_I(g zRU#wz%l_!(Ba(!pXPn(nfeBnwtZi-MFV>a`p+`iYh2t|R8*?|}>g<*1;j2VccR zutJF=v-b97=2kh5^f@Q54nj1=&X&IU&e)NgB2k&9O-J zz^FmnxiL0v(aAedpDGvY)t*nCA8OU_nYHDlnvgNM{{XC6qSvH-d8af(R0yf3zoPU? zt>oNRe(_cnGq*HNlfp5aRFOH;=B=bRT!6lFbPX-dmIq>nQNkC7Pwq2wKjLM|F8*IKR9#cv z2TZL}0&7RQ!+AE8NZVyA7d?^~E^>VN%#C9oOmv5KFZ8JpNU<9tzoRjy|N9d0RX) zoQ1qZoLh8{zANWMC#dm-ej4R)|5y?p1hqzno5|=yE2_S!HkBM$CzYDMY6BG4@SJ%| z9BYgcwmkMMKqX`Q zHhK0|zKhQ4(`n0NA-eG-K;yWou(XudZvojdz!C;!<1WJ}@pB=}{_18Cwjp*L0j{!} z*18aV_8Lyz>KA5b(3uL`vhOk<_vUYWmuougkG4E8Eg^hJ@~F+K(DUGI&Bqj>SUo_k zb8=_g_?RQ@jRni~LM~fxgR9qPiX8d^VzWlL{f3M-L1p7s8XHIz=th`oCeYQM=D8w= zjf`jl2*9vG4z&cDE-PH1 z>F+!(S_D%mmQBDX#{Uq%!GNb*ge?WEpU4Ny67K)wzvnL7A~Cw>gzlOI&+@n)tvMIX z>=+FZo(Jsy5=R^05!c8=QW4R1cQ*Ss$DnHY@81|3wBj&Z_KntFZX#h}7llDc9#*JR zP?|^A52Re!-zeU4bp{;`@N(5DXmFA?M4=bW@|V-ux6%c~r;R1hmt7xDkyeRpj=r7i zDjj zAu}hNAbQSW(4q~;Q^f-@8QpW4P5SM({FTykl|ZAl$uDvWr_DSl<*zw(whj0A zZIE`&-mvvE)Ovt@CJ2WYEx{lJ;exFlK`tizwx_KX-FVJFp!EEbxm1vHAiXWqvW1Hm z0>x|wLf5_*M}4MByJ5uY0;3RT#0v3JH(nB!tmTJ3!bap$g5yMpWo1kfg@Qi+(TV^8 z5dySFSt65J4socIX701ZG{51j754eMS+k0A#;a=&(qOx%gC&1vwe1B9}Lik|NWn69z7)wZhw^F zC&L_1a6bx5m{fGg{`4WRjcA^rmRezsBM!D3)a5{81q6~>H^g9pmQghSwo=?J!#Dy* zsRUD6Ohs>I3aV@QM>9TMy zEwK}9vgH}p1vsfPM_pb^2JO}YGnJi)dREwvu-)oaDv#ji3)SK-9JasUWTmiJxL_{L zjiJ9~rJ~QE@sl(+Ys@_A>LP4vMw-fMx);y3`r0xKBB~#AO&Dg*LBVI}TcjO5s2Q1u zhmA_?DF0=1>T6YMYgcHc$!cDF8}6jWcQyOOl}}bL#{lgY77uO&Z2v*lev+^d6}vii zXA-$(-*pVG{2E!)xljL))lhm^4bIJ2HrT^TO))n0Svr3YTQ*7agatP29SM9Yl0N#& zl{0eT7~v(VM#tLsQ0&z?>aZC#+tun2+Ej63?96*c%fVG(enNAAJf&`w+x1$Uy6tGO zku(cExhsOZ4&#sxiW?~^!m5^c?l;{=!^Mr$ubRG=z71FA*7e@=3{4hGZuOyAE%mjb zw9rozer}JdD23XP=h>>#nUYuXpF5k=7XKys48-pmG zb02LwfdiBgDYuEK1?W;IP*xg`I@HItXf|Sks1KG zIWPWPH|jpXIl!(sc)ejr+X*oK4GOp$ute$wx5TeFR?l-9=4+ zNS-M5n(%<%MLeF$K4)PWxhO{5oT$HHS6CM>d0D!MN7IPbHLT&tWNX&kgQHz@pDp6} z&4ERrC)x;VS^V#S?f$+Nm2a=_T;0W04I9vW`CjTBY>MD1b1}_Wl`6Jjj%n5?g#Zej zRfI^&G^vN$Ltu`2MCbr4Rc~&@f;FH-tY{?%z?bukTzo1}WG*hZ;m8 z?0Rylzv|MSKdI<9xwDjdNfxe%^T*LFN|X1_9sVD)z3BKfafiZ~RoZS{=O`XB*<;NB ztUf1I%2}+%w|Z#|Q>K1MV1R(j;bjobumBQcW(X;l!epOeD=#rk9`A;1jmj5lU5pI= zz~3^2im%aF;jXL+sKI!Wu?57^`EwKpbEl6Xt02R|pq^|!45>eO5paN-sqCc^4=BE? zM&@5iEJ+iQkan2#$S0C?F%b<;&4z)X?5BHL?=9e15&TC&6zj&0eTk%MChP9Es@90L zk+@QTRN#wF@>+R^H8RhI#ItM_zxwH?N&n~jNsT~h-Q*=4H1kdnU`x}fMYC2Is!sBbB~fo^#JVlBg+334|RbEU@0eyvJ{LQ zM(87mNi6PI_#~Lqz7TvO&&&?z!%0sAkuH)51LCU_l&VEv(!3hQ-Waxq+}!W0)#Peb zOCA2cCe5vh7Jauly);CRYah8<#KHuK6R^?KAxDnPL)8E%Oi6BZjsR1NUR7f;f|PF+ z3e^a+`Okm+ALmBivWEOu!}sl~tp);G)JC^P*P5ExfO9o9^E$$s3UM;z4i#z#iUOhS z)a}U@o0wE$?ol@_sW2lAlgxJ446LQSQg=}bdggj7LaRI6`8>u#54!AqxRF^0(t5kpW)1zjAmgiv3s>D18_2O1qVl$OOYKIMjO0E2`F`S71u7T8m ztEG%Cju{XK8U3bwmO)tc3sBViz?XCThzJY+1ECMQN~ zH^?3d={|;!Smc_9v_D*o~M=i{?RPtA+svQ}%cj37B6iH>4b-nz;W9bwaRK78E6gq^pQSCFKwp zx@Z_3`nGPZ9`!zKienubR;Vf*M!p&Qx&a&O5$N6DD4;^5+*qWP~Mw~ z9wge(MQDDagA?c)=&|DIUEPFVjg6(+pLva4vDzkHHT$iptpl}`MqtV<2}P4$yFrgh z@!V=eav0L|R_a`|sg99RUB~$pJ@xTQ>e0eY&y8nMm?!fwuULVVci_CLUGMEts1==? zG6>GL@R(D7odxWgSFr#=eudQ0?8Q-%^cMsy|3H%7IbO8B;brzC7=C4xg@MQWr?jXGJ5)bRxT2_q>-Ri9l$K9|uuy1J&c4}_Z z>gg?Q_+0z#w<0?oo~YRi2DH@-tiNd8*V2VQ(|!T8kdz7#Rw%#)t5z7myIauA!cFvq zmA=e<2{M}ex(OmbW&N)FB)2mc8QrJ+Q4Y&fF~Gp5Kt;*ik3sDc$a*NOyPJsZ&N)hQ zRaNqxt$T9CSj1ExK@1 zS8CJgfn^GJtiNSnG=NCZctbauwN>A|n`sVLE0J20Ut8?#%-+vOD@j?HN=@*0bZF6` zC>Ee8;>Ymq--sDYwd45L{#|XG=1S{rdYhe)7Pd8~6I|*}tLrSLmMg5?HceZBMdk_) z>4^L{MXa%hU;D8P@t82*FwcMzF-iU=_$jS!z$`JQS_v&l2B2VN4Ta%zWZSv*aPBEC zSW98v(_@X6XKr>2UFg?^BuB-N^Ef4+U?BIFd$4nc7}fJ#qOd5I3XE`>?P7_ zK|9+$h=NC0?bilD^(b&LkpI_)9NwH%Z zjbq-34@Xg$BEa@7h7+oJU2gASqfc)B=#2j|8l!u5G^Vw0f1Z38-{9xB!@K**XmUHe zpM1HYan_dSWLu$444acVu9@=1OTM1aoksH}KyHB@y%B0t%ap5Xx6wJG6~~ArxOYS; zVjonM{1zXtNlY;UR4kM0@%>i){@zbjQquQ+dkJlM&v$B>EM{?XZYdNfGDdoZUyI%G zN!ph$`Xmb6Sc9KB=2VtLC)PqP%V8v{ve$@yaG@4gj}t^I@!{@*o%GLt{J-%X#enhS zE3!`ANRc;m;&;Vi7V+PiMRzWi4* z(B0v)Z5p1h5wf*Tg(lO9ImFEUG)y6v9N8BRiyD8Rkdjn84VnLFYm?kneoY6SGe_#8 zQ3E5|_QI(lr&7eKyT*_gin~S&9U8hO*9HwM;lG2Fn0_9cnkScMvGiKs{#xz2w}CRA zTVV?H4i8c?0Z5GJ{sK;FI0w5q-(>DDnaeMZQ&-BrhC$>#h`V+JvJAVv2lS?eSz_Ok zz^A!wMdi8@@jjG$E#BL=ZO=|acCl%qm#VjYL@Q#vEKL~&Ay$p?3=>(LM3FwPbfR^Z=Z)lGP0|Nc9AJ{)Z1O?x?u9uYzG zCT1D=MGBA$tf_w3Qh(WZX_10iOrOOgp3I4DJgxOM>wx&9gEjkdpSZPIePhInQL* zYgL{ZGiX_34F!pNJerO_)oBT})g(g@eyj~I7=2ojmvpBxSK;|~56@2uWUk_<+n3-L57EKY zYLOq4{&q&a9a`k)I&hapm!JM+^D!WKw59?<#Tc6N-RvHEt1|4){ zvOL3sQZ*y#6Y4+tGuV3vWjb?bxXz5@zbmk78<5eFv5<1uVTEH;*@#z2_7(#oy%kW?@aO zqH*dj2<)49&X$y2|kDA7-w5Q6~lN~h! z>MK4w;%TaP?>lqS=wJsXro~>;H_)EXoUTD^6$D=J4iw_DsK&xL9KDkKX3bai2FAeUZrz*8n0XU#dPS??}`dM(SakaWhM&`|(}{ZjS;G3CyQzy=ko8TdnmXSkO&aF7x#n3* z0xL?A5YZ@k@Pi;xV7rT}(5FRB0V-p@0ML@IM}3VykETOj2vs~&HSY8>0(q?ysTGLa z$OjEK(w3g3T>)H=an|s*C6gPXp(;ADy&$7%QIf*{Nx~HDMdW1(I*MT{VSzmJA5&ih z^ZlTO$X{}7iBo$cEw;dz*3lyypOc0TV)Oba^5Uxkv$x{X7{xA=x>T+)N!5+eFGpZC zAzo$-Eyv63fVfoiVs_`h9or#&iP%<>0(G6*)=EkP{kS2&rp^-Uzu5sppKOLF@_mtq z%k(HLYA?Q4;UFcgOFo%i5M}e?nNw7_TjtNPYU8c4L2FfF6U$ex&)z8M{76RJjnFO8 z9V92S@jfJcBn>m)tZ7|nRpLN|2!+D5sg+Mw?xPz#L3@Q+rh4 zIjZE^eK3fCJ;!O@pWNVtEBlW3WsZi=Z_A?AP>H753~z9%l`|uYC1s1GjoitMjU7p} z;c>Y|Oi+Pq-{XG}DF4BeZ48P3K*}@>bekEa7*XSp_X5QC%Z^HpnuJ*0QP}68FgP0i zSvd3bnu2XdYt`3usm3+Hm6{a4s{GV2C~X?yr54mSR@|{s`$ZH>zSy_EoMnE1S35@# zcWHHrQc=q7Gkxn+7(5@Vb#+oIR133(i#|y45S(L-vhXN-%N*^dRXv0Edd845`62)>!mh2BZdOMuIA`2AV(s1uoxdyvInQx07o#4{9GdcCH zy)xS+S_4Jl!@{Ne8p5zY!2bW0#zBKZ4GZ)->CrR7*0lrHpER$02p= zOC~sW&;gS<3aPe87lcXA_n^uw!7OCK08G&pLYk~+WIpGnx}-=U5nb ztgfBo;E#$ye`d#^R}s_iKRDCc@o}pl&MSn}gD)?=QfrU7qld=Dc5D-@0&GM}!zXz^ zp-PI&t|HIfV^t{|-StKd3d!v9(24^q008M++6!0b)N+Wb@#WaijI=Fb!5%zU<;Pt# z10Fkb;<1i%%xA|g?FwV z37V{~C)D}U$VDdBdk1(xjq|y`^yvX)FX$EZwD`0~i#ZF0eZEfB(z>2hC^44Ad clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - writer.close(); in.close(); - if (shouldReplace) { + if (Boolean.TRUE.equals(shouldReplace)) { File f = new File("/tmp/DOAJ.csv"); f.delete(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java deleted file mode 100644 index 89259d8147..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ /dev/null @@ -1,112 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.util.List; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; - -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; - -public class TestReadCSV { - - @Test - public void testCSVUnibi() throws FileNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); - - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); - - Assertions.assertEquals(36, beans.size()); - Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); - Assertions - .assertTrue( - beans - .stream() - .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); - Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); - - } - - @Disabled - @Test - public void testCSVUrlUnibi() throws IOException { - - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> - - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - ); - } - - @Disabled - @Test - public void testCSVUrlDOAJ() throws IOException { - - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection - .setRequestProperty( - "User-Agent", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader( - new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - ); - } -} From dc8b05b39ef225c9898acc458b916331f239c25b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:18:25 +0200 Subject: [PATCH 140/162] Hosted By Map - changed the association with the datasource id for the hostedby element: there is no more the need to compute it. With the new HBM it is already the id in the graph --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 909e0f0778..efd5d24976 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -231,9 +231,7 @@ object DoiBoostMappingUtil { var hb = new KeyValue if (item != null) { hb.setValue(item.officialname) - hb.setKey(generateDSId(item.id)) - //TODO replace with the one above as soon as the new HBM will be used - //hb.setKey(item.id) + hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) From 3359f73fcfb2ea24aec49681572ef8d606b22af6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:00:42 +0200 Subject: [PATCH 141/162] cleanup & best practices --- .../dnetlib/dhp/common/collection/GetCSV.java | 51 +++++------ .../dhp/common/collection/GetCSVTest.java | 56 ++++++------ .../common/collection/models/CSVProject.java | 5 -- .../project/utils/model/CSVProject.java | 5 -- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../oa/graph/hostedbymap/Aggregators.scala | 10 +-- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 85 +++++++++++++++++++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 75 ---------------- .../SparkApplyHostedByMapToDatasource.scala | 2 +- .../SparkApplyHostedByMapToResult.scala | 4 +- .../SparkPrepareHostedByInfoToApply.scala | 6 +- .../hostedbymap/SparkProduceHostedByMap.scala | 2 +- .../oa/graph/hostedbymap/model/DOAJModel.java | 1 + .../graph/hostedbymap/model/EntityInfo.java | 61 +++++++------ .../hostedbymap/model/UnibiGoldModel.java | 21 +++-- .../graph/hostedbymap/oozie_app/workflow.xml | 6 +- .../dhp/oa/graph/hostedbymap/TestApply.scala | 4 +- .../oa/graph/hostedbymap/TestPrepare.scala | 14 +-- .../hostedbymap/iteminfofromhostedbymap.json | 40 ++++----- .../hostedbymap/iteminfofromhostedbymap2.json | 40 ++++----- .../graph/hostedbymap/iteminfofrompublication | 4 +- .../hostedbymap/iteminfofrompublication2 | 4 +- .../oa/graph/hostedbymap/preparedInfo.json | 2 +- .../oa/graph/hostedbymap/preparedInfo2.json | 6 +- .../graph/hostedbymap/unibi_transformed.json | 58 ++++++------- 25 files changed, 281 insertions(+), 283 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java index 44f4121ebe..44e19142cb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -2,17 +2,9 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.List; -import java.util.Optional; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -20,17 +12,19 @@ import org.apache.hadoop.fs.Path; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - public class GetCSV { - public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, - String modelClass) throws IOException, ClassNotFoundException { - getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + public static final char DEFAULT_DELIMITER = ','; + private GetCSV() { } public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER); + } + + public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath, String modelClass, char delimiter) throws IOException, ClassNotFoundException { Path hdfsWritePath = new Path(hdfsPath); @@ -40,26 +34,23 @@ public class GetCSV { } fsDataOutputStream = fileSystem.create(hdfsWritePath); - try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) { - ObjectMapper mapper = new ObjectMapper(); + final ObjectMapper mapper = new ObjectMapper(); - new CsvToBeanBuilder(reader) - .withType(Class.forName(modelClass)) - .withSeparator(delimiter) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + @SuppressWarnings("unchecked") + final List lines = new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse(); + + for (Object line : lines) { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } } - - } } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index d24083e0d7..c45f838288 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; import java.nio.file.Files; -import jdk.nashorn.internal.ir.annotations.Ignore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -20,6 +19,7 @@ import eu.dnetlib.dhp.common.collection.models.CSVProgramme; import eu.dnetlib.dhp.common.collection.models.CSVProject; import eu.dnetlib.dhp.common.collection.models.DOAJModel; import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; +import jdk.nashorn.internal.ir.annotations.Ignore; public class GetCSVTest { @@ -34,11 +34,11 @@ public class GetCSVTest { String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); @@ -49,39 +49,39 @@ public class GetCSVTest { if (count == 0) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); Assertions.assertTrue(csvp.getShortTitle().equals("")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } if (count == 28) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); Assertions.assertTrue(csvp.getLanguage().equals("de")); } if (count == 229) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } Assertions.assertTrue(csvp.getCode() != null); @@ -225,14 +225,14 @@ public class GetCSVTest { } if (count == 7904) { System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("", doaj.getIssn()); Assertions.assertEquals("2055-7159", doaj.getEissn()); Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); } if (count == 16707) { - Assertions.assertEquals("",doaj.getIssn()); - Assertions.assertEquals("2788-6298",doaj.getEissn()); + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2788-6298", doaj.getEissn()); Assertions .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java index 0d939ca0ec..62c847907a 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -19,7 +19,6 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "topics") private String topics; - public String getId() { return id; } @@ -28,8 +27,6 @@ public class CSVProject implements Serializable { this.id = id; } - - public String getProgramme() { return programme; } @@ -46,6 +43,4 @@ public class CSVProject implements Serializable { this.topics = topics; } - - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java index 73cea0539f..3ddb196363 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -19,7 +19,6 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "topics") private String topics; - public String getId() { return id; } @@ -28,8 +27,6 @@ public class CSVProject implements Serializable { this.id = id; } - - public String getProgramme() { return programme; } @@ -46,6 +43,4 @@ public class CSVProject implements Serializable { this.topics = topics; } - - } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 68aacdc8b5..680872126f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -92,7 +92,7 @@ public class PrepareH2020ProgrammeTest { Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); - //tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); + // tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 8198b197aa..ce383292c1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -78,11 +78,11 @@ object Aggregators { if(b2 == null){ return b1 } - if(!b1.getHb_id.equals("")){ - b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + if(!b1.getHostedById.equals("")){ + b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) return b1 } - b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) b2 } @@ -116,7 +116,7 @@ object Aggregators { if(b2 == null){ return b1 } - if(!b1.getHb_id.equals("")){ + if(!b1.getHostedById.equals("")){ return b1 } b2 @@ -131,7 +131,7 @@ object Aggregators { def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { val transformedData : Dataset[EntityInfo] = df - .groupByKey(_.getHb_id)(Encoders.STRING) + .groupByKey(_.getHostedById)(Encoders.STRING) .agg(Aggregators.datasourceToSingleIdAggregator) .map{ case (id:String , res: EntityInfo) => res diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java new file mode 100644 index 0000000000..c8329c99cb --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -0,0 +1,85 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + final HttpConnector2 connector2 = new HttpConnector2(); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + final Path path = new Path(workingPath + "/replaced.csv"); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (FSDataOutputStream fos = fileSystem.create(path, true)) { + String line; + while ((line = in.readLine()) != null) { + fos.writeUTF(line.replace("\\\"", "\"")); + fos.writeUTF("\n"); + } + } + } + + try (InputStreamReader reader = new InputStreamReader(fileSystem.open(path))) { + GetCSV.getCsv(fileSystem, reader, outputFile, classForName, delimiter); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java deleted file mode 100644 index d7d3698193..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.HttpConnector2; - -public class GetCSV { - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("workingPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final String delimiter = Optional - .ofNullable(parser.get("delimiter")) - .orElse(null); - final Boolean shouldReplace = Optional - .ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - char del = ';'; - if (delimiter != null) { - del = delimiter.charAt(0); - } - - HttpConnector2 connector2 = new HttpConnector2(); - - BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL))); - - if (Boolean.TRUE.equals(shouldReplace)) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/replaced.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - - in.close(); - in = new BufferedReader(new FileReader("/tmp/replaced.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - eu.dnetlib.dhp.common.collection.GetCSV.getCsv(fileSystem, in, hdfsPath, classForName, del); - - in.close(); - if (Boolean.TRUE.equals(shouldReplace)) { - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index ae1454b479..4245678309 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -15,7 +15,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkApplyHostedByMapToDatasource { def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { - dats.joinWith(join, dats.col("id").equalTo(join.col("hb_id")), "left") + dats.joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") .map(t2 => { val d: Datasource = t2._1 if (t2._2 != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 533e439b7f..8b0b455134 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -26,9 +26,9 @@ object SparkApplyHostedByMapToResult { val i = p.getInstance().asScala if (i.size == 1) { val inst: Instance = i(0) - inst.getHostedby.setKey(ei.getHb_id) + inst.getHostedby.setKey(ei.getHostedById) inst.getHostedby.setValue(ei.getName) - if (ei.getOpenaccess) { + if (ei.getOpenAccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 6db0ad33db..b7a7d352f2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -64,13 +64,13 @@ object SparkPrepareHostedByInfoToApply { } def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { - Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journal_id").equalTo(hbm.col("journal_id")), "left") + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 if(t2._2 != null ){ val ds = t2._2 - res.setHb_id(ds.getId) - res.setOpenaccess(ds.getOpenaccess) + res.setHostedById(ds.getId) + res.setOpenAccess(ds.getOpenAccess) res.setName(ds.getName) } res diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index de41ebf28f..d0c603e297 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -111,7 +111,7 @@ object SparkProduceHostedByMap { def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { - return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssn_l, true) + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssnL, true) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java index ba804b939d..4b5dc22a61 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -6,6 +6,7 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByName; public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") private String journalTitle; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java index 1facaa7923..1c6c88fe7a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -5,10 +5,25 @@ import java.io.Serializable; public class EntityInfo implements Serializable { private String id; - private String journal_id; + private String journalId; private String name; - private Boolean openaccess; - private String hb_id; + private Boolean openAccess; + private String hostedById; + + public static EntityInfo newInstance(String id, String journalId, String name) { + return newInstance(id, journalId, name, false); + + } + + public static EntityInfo newInstance(String id, String journalId, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journalId = journalId; + pi.name = name; + pi.openAccess = openaccess; + pi.hostedById = ""; + return pi; + } public String getId() { return id; @@ -18,12 +33,12 @@ public class EntityInfo implements Serializable { this.id = id; } - public String getJournal_id() { - return journal_id; + public String getJournalId() { + return journalId; } - public void setJournal_id(String journal_id) { - this.journal_id = journal_id; + public void setJournalId(String journalId) { + this.journalId = journalId; } public String getName() { @@ -34,35 +49,19 @@ public class EntityInfo implements Serializable { this.name = name; } - public Boolean getOpenaccess() { - return openaccess; + public Boolean getOpenAccess() { + return openAccess; } - public void setOpenaccess(Boolean openaccess) { - this.openaccess = openaccess; + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; } - public String getHb_id() { - return hb_id; + public String getHostedById() { + return hostedById; } - public void setHb_id(String hb_id) { - this.hb_id = hb_id; - } - - public static EntityInfo newInstance(String id, String j_id, String name) { - return newInstance(id, j_id, name, false); - - } - - public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess) { - EntityInfo pi = new EntityInfo(); - pi.id = id; - pi.journal_id = j_id; - pi.name = name; - pi.openaccess = openaccess; - pi.hb_id = ""; - return pi; - + public void setHostedById(String hostedById) { + this.hostedById = hostedById; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java index 0927a136be..8f24cd6154 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -6,14 +6,15 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByName; public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") private String issn; @CsvBindByName(column = "ISSN_L") - private String issn_l; + private String issnL; @CsvBindByName(column = "TITLE") private String title; @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; + private String titleSource; public String getIssn() { return issn; @@ -23,8 +24,12 @@ public class UnibiGoldModel implements Serializable { this.issn = issn; } - public String getIssn_l() { - return issn_l; + public String getIssnL() { + return issnL; + } + + public void setIssnL(String issnL) { + this.issnL = issnL; } public String getTitle() { @@ -35,11 +40,11 @@ public class UnibiGoldModel implements Serializable { this.title = title; } - public String getTitle_source() { - return title_source; + public String getTitleSource() { + return titleSource; } - public void setTitle_source(String title_source) { - this.title_source = title_source; + public void setTitleSource(String titleSource) { + this.titleSource = titleSource; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 26b1d59931..d7b85b0cbc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -100,10 +100,11 @@ - eu.dnetlib.dhp.common.collection.GetCSV + eu.dnetlib.dhp.common.collection.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold + --outputFile${workingDir}/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel @@ -112,10 +113,11 @@ - eu.dnetlib.dhp.common.collection.GetCSV + eu.dnetlib.dhp.common.collection.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj + --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel --replacetrue diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index a48e527020..1bdcb60aaf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -117,14 +117,14 @@ class TestApply extends java.io.Serializable{ val pb : Datasource = t2._1 val pa : Datasource = t2._2 assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")){ + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) - }else{ + } else { assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index efd598fef7..a3a753a8ab 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -75,8 +75,8 @@ class TestPrepare extends java.io.Serializable{ assertEquals(2, ds.count) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("1728-5852")).first().getId) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("0001-396X")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId) spark.close() } @@ -109,10 +109,10 @@ class TestPrepare extends java.io.Serializable{ val ei:EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) - assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) - assertEquals("0001-396X", ei.getJournal_id) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) + assertEquals("0001-396X", ei.getJournalId) assertEquals("Academic Therapy", ei.getName) - assertTrue(!ei.getOpenaccess) + assertTrue(!ei.getOpenAccess) spark.close() } @@ -145,9 +145,9 @@ class TestPrepare extends java.io.Serializable{ val ei:EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) - assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) assertEquals("Academic Therapy", ei.getName) - assertTrue(ei.getOpenaccess) + assertTrue(ei.getOpenAccess) ds.foreach(e => println(mapper.writeValueAsString(e))) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json index dfb95816ea..889daae479 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -1,20 +1,20 @@ -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} -{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"1434-0860","name":"Forschung im Ingenieurwesen","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} -{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} -{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} -{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} -{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"1434-0860","name":"Forschung im Ingenieurwesen","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json index 3d2f2e0050..b84f4eb861 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -1,20 +1,20 @@ -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} -{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} -{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} -{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} -{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} -{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication index 6ee4f94be7..e67c05cd0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -1,2 +1,2 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1728-5852","name":"","openaccess":false,"hb_id":""} -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1728-5852","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 index 59bd32d4b0..e11311d0a3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -1,2 +1,2 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"","openaccess":false,"hb_id":""} -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json index d9c2010821..e037c18588 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -1 +1 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json index 64e2433edb..846be762a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -1,3 +1,3 @@ -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json index d4acba4a9c..c4ac62ff58 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -1,29 +1,29 @@ -{"issn":"2502-731X","issn_l":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","title_source":"ROAD"} -{"issn":"2502-7409","issn_l":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","title_source":"ROAD"} -{"issn":"2502-7433","issn_l":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","title_source":"ROAD"} -{"issn":"2502-745X","issn_l":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","title_source":"ROAD"} -{"issn":"2502-7549","issn_l":"2502-7549","title":"ELang journal (An English Education journal)","title_source":"ROAD"} -{"issn":"2423-3633","issn_l":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","title_source":"ROAD"} -{"issn":"2423-5563","issn_l":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","title_source":"ROAD"} -{"issn":"1735-434X","issn_l":"1735-434X","title":"Iranian journal of animal biosystematics.","title_source":"ROAD"} -{"issn":"2423-4435","issn_l":"2008-6113","title":"Majallah-i jangal-i Īrān.","title_source":"ROAD"} -{"issn":"2423-4575","issn_l":"2423-4575","title":"Ābziyān-i zinatī.","title_source":"ROAD"} -{"issn":"2423-4974","issn_l":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","title_source":"ROAD"} -{"issn":"2380-0607","issn_l":"2380-0607","title":"AIHM journal club.","title_source":"ROAD"} -{"issn":"1085-4568","issn_l":"1085-4568","title":"Frontiers.","title_source":"ROAD"} -{"issn":"2380-8845","issn_l":"2380-8845","title":"˜The œjournal of contemporary archival studies.","title_source":"ROAD"} -{"issn":"2381-1803","issn_l":"2381-1803","title":"International journal of complementary & alternative medicine.","title_source":"ROAD"} -{"issn":"2381-2478","issn_l":"2381-2478","title":"Palapala.","title_source":"ROAD"} -{"issn":"2382-5170","issn_l":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","title_source":"ROAD"} -{"issn":"2382-9737","issn_l":"2382-9737","title":"Majallah-i salāmat va bihdāsht","title_source":"ROAD"} -{"issn":"2382-977X","issn_l":"2382-977X","title":"UCT journal of research in science ,engineering and technology","title_source":"ROAD"} -{"issn":"2382-9974","issn_l":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","title_source":"ROAD"} -{"issn":"2227-4782","issn_l":"2227-4782","title":"Problemi endokrinnoï patologìï.","title_source":"ROAD"} -{"issn":"2685-0079","issn_l":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","title_source":"ROAD"} -{"issn":"2574-0075","issn_l":"2574-0075","title":"Hypermedia magazine.","title_source":"ROAD"} -{"issn":"2574-0296","issn_l":"2574-0296","title":"˜The œmuseum review.","title_source":"ROAD"} -{"issn":"2574-0334","issn_l":"2574-0334","title":"Bioactive compounds in health and disease.","title_source":"ROAD"} -{"issn":"2574-108X","issn_l":"2574-108X","title":"Journal of computer science integration.","title_source":"ROAD"} -{"issn":"2574-254X","issn_l":"2574-254X","title":"Child and adolescent obesity.","title_source":"ROAD"} -{"issn":"2574-3325","issn_l":"2574-3325","title":"Journal of research on the college president.","title_source":"ROAD"} -{"issn":"2239-6101","issn_l":"2239-5938","title":"European journal of sustainable development.","title_source":"ROAD"} \ No newline at end of file +{"issn":"2502-731X","issnL":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","titleSource":"ROAD"} +{"issn":"2502-7409","issnL":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","titleSource":"ROAD"} +{"issn":"2502-7433","issnL":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","titleSource":"ROAD"} +{"issn":"2502-745X","issnL":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","titleSource":"ROAD"} +{"issn":"2502-7549","issnL":"2502-7549","title":"ELang journal (An English Education journal)","titleSource":"ROAD"} +{"issn":"2423-3633","issnL":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-5563","issnL":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","titleSource":"ROAD"} +{"issn":"1735-434X","issnL":"1735-434X","title":"Iranian journal of animal biosystematics.","titleSource":"ROAD"} +{"issn":"2423-4435","issnL":"2008-6113","title":"Majallah-i jangal-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-4575","issnL":"2423-4575","title":"Ābziyān-i zinatī.","titleSource":"ROAD"} +{"issn":"2423-4974","issnL":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","titleSource":"ROAD"} +{"issn":"2380-0607","issnL":"2380-0607","title":"AIHM journal club.","titleSource":"ROAD"} +{"issn":"1085-4568","issnL":"1085-4568","title":"Frontiers.","titleSource":"ROAD"} +{"issn":"2380-8845","issnL":"2380-8845","title":"˜The œjournal of contemporary archival studies.","titleSource":"ROAD"} +{"issn":"2381-1803","issnL":"2381-1803","title":"International journal of complementary & alternative medicine.","titleSource":"ROAD"} +{"issn":"2381-2478","issnL":"2381-2478","title":"Palapala.","titleSource":"ROAD"} +{"issn":"2382-5170","issnL":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","titleSource":"ROAD"} +{"issn":"2382-9737","issnL":"2382-9737","title":"Majallah-i salāmat va bihdāsht","titleSource":"ROAD"} +{"issn":"2382-977X","issnL":"2382-977X","title":"UCT journal of research in science ,engineering and technology","titleSource":"ROAD"} +{"issn":"2382-9974","issnL":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","titleSource":"ROAD"} +{"issn":"2227-4782","issnL":"2227-4782","title":"Problemi endokrinnoï patologìï.","titleSource":"ROAD"} +{"issn":"2685-0079","issnL":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","titleSource":"ROAD"} +{"issn":"2574-0075","issnL":"2574-0075","title":"Hypermedia magazine.","titleSource":"ROAD"} +{"issn":"2574-0296","issnL":"2574-0296","title":"˜The œmuseum review.","titleSource":"ROAD"} +{"issn":"2574-0334","issnL":"2574-0334","title":"Bioactive compounds in health and disease.","titleSource":"ROAD"} +{"issn":"2574-108X","issnL":"2574-108X","title":"Journal of computer science integration.","titleSource":"ROAD"} +{"issn":"2574-254X","issnL":"2574-254X","title":"Child and adolescent obesity.","titleSource":"ROAD"} +{"issn":"2574-3325","issnL":"2574-3325","title":"Journal of research on the college president.","titleSource":"ROAD"} +{"issn":"2239-6101","issnL":"2239-5938","title":"European journal of sustainable development.","titleSource":"ROAD"} \ No newline at end of file From baed5e3337def46cef839a4010c2b4715d8e3f81 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:14:47 +0200 Subject: [PATCH 142/162] test classes moved in specific components --- .../dhp/common/collection/GetCSVTest.java | 246 ------------------ .../collection/models/CSVProgramme.java | 80 ------ .../common/collection/models/CSVProject.java | 46 ---- .../common/collection/models/DOAJModel.java | 52 ---- .../collection/models/UnibiGoldModel.java | 45 ---- .../project/DownloadCsvTest.java | 145 +++++++++++ .../oa/graph/hostedbymap/DownloadCsvTest.java | 139 ++++++++++ 7 files changed, 284 insertions(+), 469 deletions(-) delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java deleted file mode 100644 index c45f838288..0000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ /dev/null @@ -1,246 +0,0 @@ - -package eu.dnetlib.dhp.common.collection; - -import java.io.*; -import java.nio.file.Files; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.common.collection.models.CSVProgramme; -import eu.dnetlib.dhp.common.collection.models.CSVProject; -import eu.dnetlib.dhp.common.collection.models.DOAJModel; -import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; -import jdk.nashorn.internal.ir.annotations.Ignore; - -public class GetCSVTest { - - private static String workingDir; - - private static LocalFileSystem fs; - - @Disabled - @Test - void getProgrammeFileTest() throws Exception { - - String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; - - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); - if (count == 0) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); - Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); - Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); - Assertions.assertTrue(csvp.getShortTitle().equals("")); - Assertions.assertTrue(csvp.getLanguage().equals("en")); - } - if (count == 28) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); - Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); - Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); - Assertions.assertTrue(csvp.getLanguage().equals("de")); - } - if (count == 229) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); - Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); - Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); - Assertions.assertTrue(csvp.getLanguage().equals("en")); - } - Assertions.assertTrue(csvp.getCode() != null); - Assertions.assertTrue(csvp.getCode().startsWith("H2020")); - count += 1; - } - - Assertions.assertEquals(767, count); - } - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(GetCSVTest.class.getSimpleName()) - .toString(); - - fs = FileSystem.getLocal(new Configuration()); - } - - @Disabled - @Test - void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { - String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; - // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv"; - - GetCSV - .getCsv( - fs, - new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) - // new BufferedReader(new FileReader(fileURL)) - , workingDir + "/projects", - "eu.dnetlib.dhp.common.collection.models.CSVProject", ';'); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); - if (count == 0) { - Assertions.assertTrue(csvp.getId().equals("771736")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); - Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG")); - - } - if (count == 22882) { - Assertions.assertTrue(csvp.getId().equals("752903")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); - Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); - } - if (count == 223023) { - Assertions.assertTrue(csvp.getId().equals("861952")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); - Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); - } - Assertions.assertTrue(csvp.getId() != null); - Assertions.assertTrue(csvp.getProgramme().startsWith("H2020")); - count += 1; - } - - Assertions.assertEquals(34957, count); - } - - @Disabled - @Test - void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ','); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); - if (count == 0) { - Assertions.assertTrue(unibi.getIssn().equals("0001-625X")); - Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X")); - Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica")); - - } - if (count == 43158) { - Assertions.assertTrue(unibi.getIssn().equals("2088-6330")); - Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330")); - Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); - - } - if (count == 67027) { - Assertions.assertTrue(unibi.getIssn().equals("2658-7068")); - Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488")); - Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); - } - - count += 1; - } - - Assertions.assertEquals(67028, count); - } - - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://doaj.org/csv"; - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - GetCSV - .getCsv( - fs, new BufferedReader( - new FileReader("/tmp/DOAJ_1.csv")), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.DOAJModel", ','); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - Assertions.assertEquals("0001-3765", doaj.getIssn()); - Assertions.assertEquals("1678-2690", doaj.getEissn()); - Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); - - } - if (count == 7904) { - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2055-7159", doaj.getEissn()); - Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { - - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2788-6298", doaj.getEissn()); - Assertions - .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); - } - - count += 1; - } - - Assertions.assertEquals(16713, count); - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java deleted file mode 100644 index d17fcc75ef..0000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java +++ /dev/null @@ -1,80 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; -import com.opencsv.bean.CsvIgnore; - -/** - * The model for the programme csv file - */ -public class CSVProgramme implements Serializable { - - @CsvBindByName(column = "code") - private String code; - - @CsvBindByName(column = "title") - private String title; - - @CsvBindByName(column = "shortTitle") - private String shortTitle; - - @CsvBindByName(column = "language") - private String language; - - @CsvIgnore - private String classification; - - @CsvIgnore - private String classification_short; - - public String getClassification_short() { - return classification_short; - } - - public void setClassification_short(String classification_short) { - this.classification_short = classification_short; - } - - public String getClassification() { - return classification; - } - - public void setClassification(String classification) { - this.classification = classification; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getShortTitle() { - return shortTitle; - } - - public void setShortTitle(String shortTitle) { - this.shortTitle = shortTitle; - } - - public String getLanguage() { - return language; - } - - public void setLanguage(String language) { - this.language = language; - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java deleted file mode 100644 index 62c847907a..0000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ /dev/null @@ -1,46 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - - @CsvBindByName(column = "id") - private String id; - - @CsvBindByName(column = "programme") - private String programme; - - @CsvBindByName(column = "topics") - private String topics; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java deleted file mode 100644 index 156ba36320..0000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java +++ /dev/null @@ -1,52 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java deleted file mode 100644 index ae47262bf2..0000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java +++ /dev/null @@ -1,45 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java new file mode 100644 index 0000000000..700b9e632c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java @@ -0,0 +1,145 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + CSVProgramme.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + assertTrue(csvp.getShortTitle().equals("")); + assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + assertTrue(csvp.getLanguage().equals("en")); + } + assertTrue(csvp.getCode() != null); + assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) + , workingDir + "/projects", + CSVProject.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + assertTrue(csvp.getId().equals("771736")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + assertTrue(csvp.getId().equals("752903")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + assertTrue(csvp.getId().equals("861952")); + assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + assertTrue(csvp.getId() != null); + assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java new file mode 100644 index 0000000000..b93320e1a7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -0,0 +1,139 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import java.io.*; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + UnibiGoldModel.class.getName()); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + Assertions.assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + GetCSV + .getCsv( + fs, new BufferedReader( + new FileReader("/tmp/DOAJ_1.csv")), + workingDir + "/programme", + DOAJModel.class.getName()); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + Assertions.assertEquals("0001-3765", doaj.getIssn()); + Assertions.assertEquals("1678-2690", doaj.getEissn()); + Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + + } + if (count == 7904) { + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2055-7159", doaj.getEissn()); + Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { + + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2788-6298", doaj.getEissn()); + Assertions + .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); + } + + count += 1; + } + + Assertions.assertEquals(16713, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} From c3ad4ab701af352805b776a23ee9db7c426adab9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:23:15 +0200 Subject: [PATCH 143/162] minor fixes --- .../oa/graph/hostedbymap/SparkProduceHostedByMap.scala | 4 ++-- .../oa/graph/hostedbymap/download_csv_parameters.json | 9 +++------ .../dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 6 ++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index d0c603e297..1ee1d5d1ab 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -209,8 +209,8 @@ object SparkProduceHostedByMap { Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) - .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) - .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index fba048343b..cf417c6757 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -1,13 +1,10 @@ - [ - { "paramName":"fu", "paramLongName":"fileURL", "paramDescription": "the url to download the csv file ", "paramRequired": true }, - { "paramName":"wp", "paramLongName":"workingPath", @@ -27,9 +24,9 @@ "paramRequired": true }, { - "paramName": "sr", - "paramLongName": "replace", - "paramDescription": "true if the input file has to be cleaned before parsing", + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "csv delimiter character", "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index d7b85b0cbc..870b4ba0f2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -78,7 +78,6 @@ - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -92,7 +91,6 @@ - @@ -100,7 +98,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -113,7 +111,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj From 7ee2757fcd97d2d842638f4e52290df8217f465b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:41:01 +0200 Subject: [PATCH 144/162] fixed DownloadCSV parameters spec; workflow patching the hostedby replaces the graph content (publication, datasource) rather than creating a copy --- .../SparkApplyHostedByMapToDatasource.scala | 13 ++++++++----- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 7 +++++-- .../graph/hostedbymap/download_csv_parameters.json | 6 ++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 4245678309..1b18ba3ae4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -27,8 +27,8 @@ object SparkApplyHostedByMapToDatasource { d })(Encoders.bean((classOf[Datasource]))) } - def main(args: Array[String]): Unit = { + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() @@ -41,18 +41,15 @@ object SparkApplyHostedByMapToDatasource { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - val graphPath = parser.get("graphPath") - val outputPath = parser.get("outputPath") val preparedInfoPath = parser.get("preparedInfoPath") - implicit val formats = DefaultFormats - implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") @@ -62,6 +59,12 @@ object SparkApplyHostedByMapToDatasource { .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 8b0b455134..db7ab4ac07 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -75,8 +75,11 @@ object SparkApplyHostedByMapToResult { applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) - - + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index cf417c6757..22c65eb359 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -11,6 +11,12 @@ "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", "paramRequired": true }, + { + "paramName":"of", + "paramLongName":"outputFile", + "paramDescription": "the output json file produced by the CSV downlaod procedure", + "paramRequired": true + }, { "paramName": "hnn", "paramLongName": "hdfsNameNode", From 17cefe6a97edd8434ce806af5064ce1b766adedd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:43:59 +0200 Subject: [PATCH 145/162] [HBM] removed stale replace option --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 870b4ba0f2..a03f4e36ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -117,7 +117,6 @@ --workingPath${workingDir}/doaj --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel - --replacetrue From 5f0903d50d6abff7ae5ae98a71b53d0045d62ae8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 14:17:54 +0200 Subject: [PATCH 146/162] fixed CSV downloader & tests --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 26 ++++++++---- .../oa/graph/hostedbymap/DownloadCsvTest.java | 42 ++++++++----------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index c8329c99cb..be35d31f85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap; import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; +import eu.dnetlib.dhp.common.collection.CollectorException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -56,30 +59,37 @@ public class DownloadCSV { .orElse(DEFAULT_DELIMITER); log.info("delimiter {}", delimiter); - final HttpConnector2 connector2 = new HttpConnector2(); - Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); + + new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + + } + + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { + + final HttpConnector2 connector2 = new HttpConnector2(); + final Path path = new Path(workingPath + "/replaced.csv"); try (BufferedReader in = new BufferedReader( new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - try (FSDataOutputStream fos = fileSystem.create(path, true)) { + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(path, true), Charset.defaultCharset()))) { String line; while ((line = in.readLine()) != null) { - fos.writeUTF(line.replace("\\\"", "\"")); - fos.writeUTF("\n"); + writer.write(line.replace("\\\"", "\"")); + writer.newLine(); } } } - try (InputStreamReader reader = new InputStreamReader(fileSystem.open(path))) { - GetCSV.getCsv(fileSystem, reader, outputFile, classForName, delimiter); + try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index b93320e1a7..7b02025fb2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -39,14 +39,16 @@ public class DownloadCsvTest { String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - UnibiGoldModel.class.getName()); + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV().doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); String line; int count = 0; @@ -82,24 +84,16 @@ public class DownloadCsvTest { String fileURL = "https://doaj.org/csv"; - try (BufferedReader in = new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV().doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); - GetCSV - .getCsv( - fs, new BufferedReader( - new FileReader("/tmp/DOAJ_1.csv")), - workingDir + "/programme", - DOAJModel.class.getName()); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); String line; int count = 0; From f74adc475275b32abb4a66a6ae3844fd566f4bff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 15:52:15 +0200 Subject: [PATCH 147/162] added DownloadCSV2 as alternative implementation of the same download procedure --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 14 +- .../oa/graph/hostedbymap/DownloadCSV2.java | 84 +++++++ .../hostedbymap/download_csv_parameters.json | 6 +- .../graph/hostedbymap/oozie_app/workflow.xml | 13 +- .../oa/graph/hostedbymap/DownloadCsvTest.java | 210 ++++++++++-------- 5 files changed, 214 insertions(+), 113 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index be35d31f85..dff761c34e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -7,16 +7,15 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.dhp.common.collection.CollectorException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.GetCSV; import eu.dnetlib.dhp.common.collection.HttpConnector2; @@ -68,8 +67,9 @@ public class DownloadCSV { } - protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, char delimiter, FileSystem fs) - throws IOException, ClassNotFoundException, CollectorException { + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { final HttpConnector2 connector2 = new HttpConnector2(); @@ -78,11 +78,11 @@ public class DownloadCSV { try (BufferedReader in = new BufferedReader( new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(path, true), Charset.defaultCharset()))) { + try (PrintWriter writer = new PrintWriter( + new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { String line; while ((line = in.readLine()) != null) { - writer.write(line.replace("\\\"", "\"")); - writer.newLine(); + writer.println(line.replace("\\\"", "\"")); } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java new file mode 100644 index 0000000000..d82d008629 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV2 { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String tmpFile = parser.get("tmpFile"); + log.info("tmpFile {}", tmpFile); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + HttpConnector2 connector2 = new HttpConnector2(); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); + } finally { + FileUtils.deleteQuietly(new File(tmpFile)); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index 22c65eb359..50fbb00f0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -6,9 +6,9 @@ "paramRequired": true }, { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramName":"tf", + "paramLongName":"tmpFile", + "paramDescription": "the temporary local file storing the cleaned CSV contents for unibi gold list and doj artciles", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index a03f4e36ae..84035fe4ef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -73,7 +73,8 @@ - ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'download_csv'} @@ -98,10 +99,10 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 --hdfsNameNode${nameNode} --fileURL${unibiFileURL} - --workingPath${workingDir}/unibi_gold + --tmpFile/tmp/unibi_gold_replaced.csv --outputFile${workingDir}/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel @@ -111,10 +112,10 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 --hdfsNameNode${nameNode} --fileURL${doajFileURL} - --workingPath${workingDir}/doaj + --tmpFile/tmp/doaj_replaced.csv --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel @@ -141,7 +142,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --datasourcePath${sourcePath}/datasource - --workingPath${workingDir} + --workingPath/user/${wf:user()}/data --outputPath${hostedByMapPath} --masteryarn-cluster diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index 7b02025fb2..edf74fc6a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -1,133 +1,149 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.GetCSV; -import eu.dnetlib.dhp.common.collection.HttpConnector2; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.*; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.*; -import java.nio.file.Files; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.assertTrue; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; public class DownloadCsvTest { - private static String workingDir; + private static final Logger log = LoggerFactory.getLogger(DownloadCsvTest.class); - private static LocalFileSystem fs; + private static String workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(DownloadCsvTest.class.getSimpleName()) - .toString(); + private static LocalFileSystem fs; - fs = FileSystem.getLocal(new Configuration()); - } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); - @Disabled - @Test - void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + fs = FileSystem.getLocal(new Configuration()); + } - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - final String outputFile = workingDir + "/unibi_gold.json"; - new DownloadCSV().doDownload( - fileURL, - workingDir + "/unibi_gold", - outputFile, - UnibiGoldModel.class.getName(), - ',', - fs); + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); - if (count == 0) { - assertTrue(unibi.getIssn().equals("0001-625X")); - assertTrue(unibi.getIssnL().equals("0001-625X")); - assertTrue(unibi.getTitle().equals("Acta Mycologica")); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - } - if (count == 43158) { - assertTrue(unibi.getIssn().equals("2088-6330")); - assertTrue(unibi.getIssnL().equals("2088-6330")); - assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); - } - if (count == 67027) { - assertTrue(unibi.getIssn().equals("2658-7068")); - assertTrue(unibi.getIssnL().equals("2308-2488")); - assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); - } + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); - count += 1; - } + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } - Assertions.assertEquals(67028, count); - } + count += 1; + } - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + assertEquals(67028, count); + } - String fileURL = "https://doaj.org/csv"; + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - final String outputFile = workingDir + "/doaj.json"; - new DownloadCSV().doDownload( - fileURL, - workingDir + "/doaj", - outputFile, - DOAJModel.class.getName(), - ',', - fs); + String fileURL = "https://doaj.org/csv"; - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - Assertions.assertEquals("0001-3765", doaj.getIssn()); - Assertions.assertEquals("1678-2690", doaj.getEissn()); - Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - } - if (count == 7904) { - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2055-7159", doaj.getEissn()); - Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + assertEquals("0001-3765", doaj.getIssn()); + assertEquals("1678-2690", doaj.getEissn()); + assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + } + if (count == 22) { + log.info(new ObjectMapper().writeValueAsString(doaj)); + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + } + if (count == 7904) { + // log.info(new ObjectMapper().writeValueAsString(doaj)); + assertEquals("", doaj.getIssn()); + assertEquals("2055-7159", doaj.getEissn()); + assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2788-6298", doaj.getEissn()); - Assertions - .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); - } + assertEquals("2783-1043", doaj.getIssn()); + assertEquals("2783-1051", doaj.getEissn()); + assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); + } - count += 1; - } + count += 1; + } - Assertions.assertEquals(16713, count); - } + assertEquals(16715, count); + } - @AfterAll - public static void cleanup() { - FileUtils.deleteQuietly(new File(workingDir)); - } + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } } From 7743d0f9193e2fc9837f7e101ed25e398296b8c3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 16:14:54 +0200 Subject: [PATCH 148/162] consolidated dnet wf profiles into the same submodule --- .../dedup}/dataset_dedup_configuration.xml | 0 ...herresearchproduct_dedup_configuration.xml | 0 .../publication_dedup_configuration.xml | 0 .../result_deduplication_orchestrator.xml | 0 .../dedup}/software_dedup_configuration.xml | 0 .../dnetlib/dhp/provision/02_beta_graph.xml | 25 + dhp-workflows/dhp-workflow-profiles/pom.xml | 27 - .../wf/profiles/graph_beta_construction.xml | 779 ------------------ .../wf/profiles/graph_prod_construction.xml | 705 ---------------- .../dhp/wf/profiles/graph_to_hiveDB.xml | 73 -- .../dnetlib/dhp/wf/profiles/update_solr.xml | 98 --- .../dnetlib/dhp/wf/profiles/update_stats.xml | 74 -- 12 files changed, 25 insertions(+), 1756 deletions(-) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/dataset_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/otherresearchproduct_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/publication_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/result_deduplication_orchestrator.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/software_dedup_configuration.xml (100%) delete mode 100644 dhp-workflows/dhp-workflow-profiles/pom.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 766783f8bb..344e99044b 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -660,6 +660,31 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'produceHBM', + 'hostedByMapPath' : '/user/dnet.beta/data', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/beta_provision/working_dir/hostedbymap' + } + + build-report + diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml deleted file mode 100644 index 54e76c1e29..0000000000 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - dhp-workflows - eu.dnetlib.dhp - 1.2.4-SNAPSHOT - - 4.0.0 - - dhp-workflow-profiles - jar - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml deleted file mode 100644 index 08ed24cd07..0000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml +++ /dev/null @@ -1,779 +0,0 @@ - -
- - - - - -
- - Graph Construction [HYBRID] - Data Provision - 30 - - - - reuse cached content from the PROD aggregation system - - reuseProdContent - true - - - - - - - - set the PROD aggregator content path - - prodContentPath - /tmp/core_aggregator - - - - - - - - Set the path containing the PROD AGGREGATOR graph - - prodAggregatorGraphPath - /tmp/core_provision/graph/00_prod_graph_aggregator - - - - - - - - reuse cached content from the BETA aggregation system - - reuseBetaContent - true - - - - - - - - set the BETA aggregator content path - - betaContentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the BETA AGGREGATOR graph - - betaAggregatorGraphPath - /tmp/core_provision/graph/00_beta_graph_aggregator - - - - - - - - Set the IS lookup service address - - isLookUpUrl - http://services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the target path to store the MERGED graph - - mergedGraphPath - /tmp/core_provision/graph/01_graph_merged - - - - - - - - Set the target path to store the RAW graph - - rawGraphPath - /tmp/core_provision/graph/02_graph_raw - - - - - - - - Set the target path to store the DEDUPED graph - - dedupGraphPath - /tmp/core_provision/graph/03_graph_dedup - - - - - - - - Set the target path to store the INFERRED graph - - inferredGraphPath - /tmp/core_provision/graph/04_graph_inferred - - - - - - - - Set the target path to store the CONSISTENCY graph - - consistentGraphPath - /tmp/core_provision/graph/05_graph_consistent - - - - - - - - Set the target path to store the ORCID enriched graph - - orcidGraphPath - /tmp/core_provision/graph/06_graph_orcid - - - - - - - - Set the target path to store the BULK TAGGED graph - - bulkTaggingGraphPath - /tmp/core_provision/graph/07_graph_bulktagging - - - - - - - - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph - - affiliationGraphPath - /tmp/core_provision/graph/08_graph_affiliation - - - - - - - - Set the target path to store the COMMUNITY from SELECTED SOURCES graph - - communityOrganizationGraphPath - /tmp/core_provision/graph/09_graph_comunity_organization - - - - - - - - Set the target path to store the FUNDING from SEMANTIC RELATION graph - - fundingGraphPath - /tmp/core_provision/graph/10_graph_funding - - - - - - - - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph - - communitySemRelGraphPath - /tmp/core_provision/graph/11_graph_comunity_sem_rel - - - - - - - - Set the target path to store the COUNTRY enriched graph - - countryGraphPath - /tmp/core_provision/graph/12_graph_country - - - - - - - - Set the target path to store the CLEANED graph - - cleanedGraphPath - /tmp/core_provision/graph/13_graph_cleaned - - - - - - - - Set the target path to store the blacklisted graph - - blacklistedGraphPath - /tmp/core_provision/graph/14_graph_blacklisted - - - - - - - - Set the map of paths for the Bulk Tagging - - bulkTaggingPathMap - {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} - - - - - - - - Set the map of associations organization, community list for the propagation of community to result through organization - - propagationOrganizationCommunityMap - {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} - - - - - - - - - Set the dedup orchestrator name - - dedupConfig - decisiontree-dedup-test - - - - - - - - declares the ActionSet ids to promote in the RAW graph - - actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent - - - - - - - - declares the ActionSet ids to promote in the INFERRED graph - - actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 - - - - - - - - wait configurations - - - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'betaAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseBetaContent', - 'contentPath' : 'betaContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/beta_aggregator' - } - - build-report - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'prodAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseProdContent', - 'contentPath' : 'prodContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/prod_aggregator' - } - - build-report - - - - - - - - wait configurations - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'betaInputGgraphPath' : 'betaAggregatorGraphPath', - 'prodInputGgraphPath' : 'prodAggregatorGraphPath', - 'graphOutputPath' : 'mergedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/merge/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/merge_graph' - } - - build-report - - - - - - - - create the RAW graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'mergedGraphPath', - 'outputGraphRootPath' : 'rawGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsRaw' - } - - build-report - - - - - - - - search for duplicates in the raw graph - - executeOozieJob - IIS - - { - 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'rawGraphPath', - 'dedupGraphPath': 'dedupGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' - } - - build-report - - - - - - - - create the INFERRED graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'dedupGraphPath', - 'outputGraphRootPath' : 'inferredGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsIIS' - } - - build-report - - - - - - - - mark duplicates as deleted and redistribute the relationships - - executeOozieJob - IIS - - { - 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' - } - - build-report - - - - - - - - propagates ORCID among results linked by allowedsemrels semantic relationships - - executeOozieJob - IIS - - { - 'sourcePath' : 'consistentGraphPath', - 'outputPath': 'orcidGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - mark results respecting some rules as belonging to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'orcidGraphPath', - 'outputPath': 'bulkTaggingGraphPath', - 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/bulktag' - } - - build-report - - - - - - - - creates relashionships between results and organizations when the organizations are associated to institutional repositories - - executeOozieJob - IIS - - { - 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/affiliation', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap - - executeOozieJob - IIS - - { - 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'communityOrganizationGraphPath', - 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_organization', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects - - executeOozieJob - IIS - - { - 'sourcePath' : 'communityOrganizationGraphPath', - 'outputPath': 'fundingGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'fundingGraphPath', - 'outputPath': 'communitySemRelGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from - - executeOozieJob - IIS - - { - 'sourcePath' : 'communitySemRelGraphPath', - 'outputPath': 'countryGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/core_provision/working_dir/country', - 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'countryGraphPath', - 'graphOutputPath': 'cleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/clean' - } - - build-report - - - - - - - - removes blacklisted relations - - executeOozieJob - IIS - - { - 'sourcePath' : 'cleanedGraphPath', - 'outputPath': 'blacklistedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml deleted file mode 100644 index 4eab12c73a..0000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml +++ /dev/null @@ -1,705 +0,0 @@ - -
- - - - - -
- - Graph Construction [PROD] - Data Provision - 30 - - - - reuse cached content from the aggregation system - - reuseContent - true - - - - - - - - set the aggregator content path - - contentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the AGGREGATOR graph - - aggregatorGraphPath - /tmp/beta_provision/graph/00_graph_aggregator - - - - - - - - Set the target path to store the RAW graph - - rawGraphPath - /tmp/beta_provision/graph/01_graph_raw - - - - - - - - Set the target path to store the first CLEANED graph - - firstCleanedGraphPath - /tmp/prod_provision/graph/02_graph_first_cleaned - - - - - - - - Set the target path to store the DEDUPED graph - - dedupGraphPath - /tmp/beta_provision/graph/03_graph_dedup - - - - - - - - Set the target path to store the INFERRED graph - - inferredGraphPath - /tmp/beta_provision/graph/04_graph_inferred - - - - - - - - Set the target path to store the CONSISTENCY graph - - consistentGraphPath - /tmp/beta_provision/graph/05_graph_consistent - - - - - - - - Set the target path to store the ORCID enriched graph - - orcidGraphPath - /tmp/beta_provision/graph/06_graph_orcid - - - - - - - - Set the target path to store the BULK TAGGED graph - - bulkTaggingGraphPath - /tmp/beta_provision/graph/07_graph_bulktagging - - - - - - - - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph - - affiliationGraphPath - /tmp/beta_provision/graph/08_graph_affiliation - - - - - - - - Set the target path to store the COMMUNITY from SELECTED SOURCES graph - - communityOrganizationGraphPath - /tmp/beta_provision/graph/09_graph_comunity_organization - - - - - - - - Set the target path to store the FUNDING from SEMANTIC RELATION graph - - fundingGraphPath - /tmp/beta_provision/graph/10_graph_funding - - - - - - - - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph - - communitySemRelGraphPath - /tmp/beta_provision/graph/11_graph_comunity_sem_rel - - - - - - - - Set the target path to store the COUNTRY enriched graph - - countryGraphPath - /tmp/beta_provision/graph/12_graph_country - - - - - - - - Set the target path to store the CLEANED graph - - cleanedGraphPath - /tmp/beta_provision/graph/13_graph_cleaned - - - - - - - - Set the target path to store the blacklisted graph - - blacklistedGraphPath - /tmp/beta_provision/graph/14_graph_blacklisted - - - - - - - - Set the lookup address - - isLookUpUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the map of paths for the Bulk Tagging - - bulkTaggingPathMap - {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} - - - - - - - - Set the map of associations organization, community list for the propagation of community to result through organization - - propagationOrganizationCommunityMap - {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} - - - - - - - - - Set the dedup orchestrator name - - dedupConfig - decisiontree-dedup-test - - - - - - - - declares the ActionSet ids to promote in the RAW graph - - actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent - - - - - - - - declares the ActionSet ids to promote in the INFERRED graph - - actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 - - - - - - - - wait configurations - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'aggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseContent', - 'contentPath' : 'contentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' - } - - build-report - - - - - - - - create the RAW graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'aggregatorGraphPath', - 'outputGraphRootPath' : 'rawGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'rawGraphPath', - 'graphOutputPath': 'firstCleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/first_clean' - } - - build-report - - - - - - - - search for duplicates in the raw graph - - executeOozieJob - IIS - - { - 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'firstCleanedGraphPath', - 'dedupGraphPath': 'dedupGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' - } - - build-report - - - - - - - - create the INFERRED graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'dedupGraphPath', - 'outputGraphRootPath' : 'inferredGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' - } - - build-report - - - - - - - - mark duplicates as deleted and redistribute the relationships - - executeOozieJob - IIS - - { - 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' - } - - build-report - - - - - - - - propagates ORCID among results linked by allowedsemrels semantic relationships - - executeOozieJob - IIS - - { - 'sourcePath' : 'consistentGraphPath', - 'outputPath': 'orcidGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - mark results respecting some rules as belonging to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'orcidGraphPath', - 'outputPath': 'bulkTaggingGraphPath', - 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' - } - - build-report - - - - - - - - creates relashionships between results and organizations when the organizations are associated to institutional repositories - - executeOozieJob - IIS - - { - 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap - - executeOozieJob - IIS - - { - 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'communityOrganizationGraphPath', - 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects - - executeOozieJob - IIS - - { - 'sourcePath' : 'communityOrganizationGraphPath', - 'outputPath': 'fundingGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'fundingGraphPath', - 'outputPath': 'communitySemRelGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from - - executeOozieJob - IIS - - { - 'sourcePath' : 'communitySemRelGraphPath', - 'outputPath': 'countryGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/beta_provision/working_dir/country', - 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'countryGraphPath', - 'graphOutputPath': 'cleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/clean' - } - - build-report - - - - - - - - removes blacklisted relations - - executeOozieJob - IIS - - { - 'sourcePath' : 'cleanedGraphPath', - 'outputPath': 'blacklistedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml deleted file mode 100644 index 0ace12ea38..0000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml +++ /dev/null @@ -1,73 +0,0 @@ - -
- - - - - -
- - Graph to HiveDB [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - inputPath - - - - - - - - Set the target path to store the RAW graph - - hiveDbName - - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'inputPath' : 'inputPath', - 'hiveDbName' : 'hiveDbName' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/hive/oozie_app' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml deleted file mode 100644 index 8a7738bcfa..0000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml +++ /dev/null @@ -1,98 +0,0 @@ - -
- - - - - -
- - Update Solr [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - inputGraphRootPath - - - - - - - - Set the target path to store the RAW graph - - format - TMF - - - - - - - Set the lookup address - - isLookupUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'inputGraphRootPath' : 'inputGraphRootPath', - 'isLookupUrl' : 'isLookupUrl', - 'format' : 'format' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/provision/oozie_app', - 'maxRelations' : '100', - 'relPartitions' : '3000', - 'batchSize' : '2000', - 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments', - 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', - 'resumeFrom' : 'prepare_relations', - 'sparkDriverMemoryForJoining' : '3G', - 'sparkExecutorMemoryForJoining' : '7G', - 'sparkExecutorCoresForJoining' : '4', - 'sparkDriverMemoryForIndexing' : '2G', - 'sparkExecutorMemoryForIndexing' : '2G', - 'sparkExecutorCoresForIndexing' : '64', - 'sparkNetworkTimeout' : '600', - 'workingDir' : '/tmp/beta_provision/working_dir/update_solr' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml deleted file mode 100644 index a91b6302e1..0000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml +++ /dev/null @@ -1,74 +0,0 @@ - -
- - - - - -
- - Update Stats [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - openaire_db_name - - - - - - - - Set the target path to store the RAW graph - - stats_db_name - - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'openaire_db_name' : 'openaire_db_name', - 'stats_db_name' : 'stats_db_name' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/stats/oozie_app', - 'hive_timeout' : '3000' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file From ed7e28490a2cf0c08a7b67240b28bfebe6f290cc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 16:19:01 +0200 Subject: [PATCH 149/162] change in sh --- .../dhp/doiboost/crossref_dump_reader/oozie_app/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh index 2ce7042835..bb7ec0b45a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put $2/$3 \ No newline at end of file +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file From 6fec71e8d2bb2d27b73d83cc273bd20b2465ceb6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 16:39:02 +0200 Subject: [PATCH 150/162] removed the specific of the infra we are running the wf from the wf name --- .../eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml | 2 +- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 3700ce5d97..ab3b9593ed 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 831ff5a579..f5596b60ee 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory From bc7068106c9b5ae1737efcd2556da267f326541a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:19:44 +0200 Subject: [PATCH 151/162] added crossref download oozie workflow --- .../oozie_app/config-default.xml | 42 ++++++ .../oozie_app/workflow.xml | 128 ++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml new file mode 100644 index 0000000000..508202e301 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index e69de29bb2..6e4f179128 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -0,0 +1,128 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + crossrefdumptoken + the token for the API dump path + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + ${crossrefdumptoken} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefdumpfilename} + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + + \ No newline at end of file From 82086f3422955934561a65ef5d8b8526ffbbfce4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:42:14 +0200 Subject: [PATCH 152/162] fixed directory name --- .../eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml | 0 .../resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml | 0 .../resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml | 0 .../dnetlib/dhp/actionmanager/actionset_h2020_classification.xml | 0 .../eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml | 0 .../main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml | 0 .../eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml | 0 .../dhp/dedup/otherresearchproduct_dedup_configuration.xml | 0 .../eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml | 0 .../eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml | 0 .../eu/dnetlib/dhp/dedup/software_dedup_configuration.xml | 0 .../resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml | 0 .../resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml | 0 .../main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml | 0 .../main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml | 0 21 files changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml (100%) diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml From 7c0c67bdd66e602bbc169358bd3a678f0edee931 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:45:53 +0200 Subject: [PATCH 153/162] added mock pom --- dhp-workflows/dhp-workflow-profiles/pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 dhp-workflows/dhp-workflow-profiles/pom.xml diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml new file mode 100644 index 0000000000..b1c51c497d --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -0,0 +1,12 @@ + + + + dhp-workflows + eu.dnetlib.dhp + 1.2.4-SNAPSHOT + + 4.0.0 + + dhp-workflow-profiles + + \ No newline at end of file From e5cf11d088655b2dfd3ca76781bb820aa847fbbc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 19 Aug 2021 10:29:04 +0200 Subject: [PATCH 154/162] change open access route to result matching hbm to gold --- .../oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index db7ab4ac07..0e047d0169 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -30,7 +30,7 @@ object SparkApplyHostedByMapToResult { inst.getHostedby.setValue(ei.getName) if (ei.getOpenAccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) - inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } From 812bd54c57d63994fde566bb4f6a41bf1cee5d2f Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 19 Aug 2021 11:30:14 +0200 Subject: [PATCH 155/162] different funders in blacklist from BETA and PROD aggregator --- .../dhp/provision/00_beta_graph_for_IIS.xml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index ef2205e322..2fed35f448 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,10 +11,20 @@ IIS 30 - - set blacklist of funder nsPrefixes + + set blacklist of funder nsPrefixes from the beta aggregator - nsPrefixBlacklist + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD gsrt________,rcuk________ @@ -375,7 +385,7 @@ 'reuseOAF' : 'reuseOAF_BETA', 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', 'contentPath' : 'betaContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', 'shouldPatchRelations' : 'shouldPatchRelations_BETA', 'idMappingPath' : 'idMappingPath' } @@ -421,7 +431,7 @@ 'reuseOAF' : 'reuseOAF_PROD', 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', 'contentPath' : 'prodContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', 'shouldPatchRelations' : 'shouldPatchRelations_PROD', 'idMappingPath' : 'idMappingPath' } From a053e1513cf489f997c88d583d2d59a8a10ae378 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 19 Aug 2021 11:32:27 +0200 Subject: [PATCH 156/162] different funders in blacklist from BETA and PROD aggregator --- .../dnetlib/dhp/provision/02_beta_graph.xml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 344e99044b..f83337b3cc 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,10 +11,20 @@ Data Provision 30 - - set blacklist of funder nsPrefixes + + set blacklist of funder nsPrefixes from the beta aggregator - nsPrefixBlacklist + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD gsrt________,rcuk________ @@ -497,7 +507,7 @@ 'reuseOAF' : 'reuseOAF_BETA', 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', 'contentPath' : 'betaContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', 'shouldPatchRelations' : 'shouldPatchRelations_BETA', 'idMappingPath' : 'idMappingPath' } @@ -543,7 +553,7 @@ 'reuseOAF' : 'reuseOAF_PROD', 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', 'contentPath' : 'prodContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', 'shouldPatchRelations' : 'shouldPatchRelations_PROD', 'idMappingPath' : 'idMappingPath' } From 65822400ce9fc1597a1f7bc5674c536047c55dfd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:10:35 +0200 Subject: [PATCH 157/162] CrossrefDump - added new parameter file that was missing --- .../generate_dataset_params.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json new file mode 100644 index 0000000000..d903ebe1d9 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json @@ -0,0 +1,16 @@ +[ + {"paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the source path", + "paramRequired": true}, + {"paramName":"m", + "paramLongName":"master", + "paramDescription": "the master name", + "paramRequired": true}, + {"paramName":"t", + "paramLongName":"targetPath", + "paramDescription": "the target path", + "paramRequired": true} + +] + From f3b6c392c1655511464f388824ff80ca564410f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:10:58 +0200 Subject: [PATCH 158/162] CrossrefDump - moving parameter file under folder crossref_dump_reader --- .../doiboost/{ => crossref_dump_reader}/crossref_dump_reader.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{ => crossref_dump_reader}/crossref_dump_reader.json (100%) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json From 35880c0e7b35b808c612dfe7efcde4e091d7fd35 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:11:35 +0200 Subject: [PATCH 159/162] CrossrefDump - changed the wf to be able to resume from one of the steps --- .../crossref_dump_reader/oozie_app/workflow.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 6e4f179128..c915783aa6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -41,6 +41,16 @@ + + + + + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} + ${wf:conf('resumeFrom') eq 'Unpack'} + + + + From 45c62609af435e1fbd93690a8121261476befa8d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:12:31 +0200 Subject: [PATCH 160/162] CrossrefDump - modified because parameter file was moved --- .../eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7d3770a04..d1861ff0a7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -26,7 +26,7 @@ public class ExtractCrossrefRecords { .toString( ExtractCrossrefRecords.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); From 882abb40e466a2232a879b9113a0c28b2362848b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:12:53 +0200 Subject: [PATCH 161/162] CrossrefDump - --- .../dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index c915783aa6..4a98a4bb44 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -51,9 +51,6 @@ - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 4c1474e6938cdd9cef37cec895b8aa232ac59758 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 20 Aug 2021 17:03:30 +0200 Subject: [PATCH 162/162] Dealing with #6859#note-2: we have to decode URLs to avoid & and other chars encoded becasue of the original XML representation of data --- .../dhp/oa/graph/raw/OafToOafMapper.java | 9 +++ .../dhp/oa/graph/raw/OdfToOafMapper.java | 18 ++++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 35 +++++++++ .../dnetlib/dhp/oa/graph/raw/encoded-url.xml | 40 ++++++++++ .../dhp/oa/graph/raw/encoded-url_odf.xml | 75 +++++++++++++++++++ 5 files changed, 173 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 2b49a9dc18..5f94910294 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -164,6 +166,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .filter(n -> StringUtils.isNotBlank(n.getText())) .map(n -> n.getText().trim()) .filter(u -> u.startsWith("http")) + .map(s -> { + try { + return URLDecoder.decode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return s; + } + }) .distinct() .collect(Collectors.toCollection(ArrayList::new))); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 02aab4f162..a365ac9aa1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.*; import java.util.stream.Collectors; @@ -137,17 +139,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Set url = new HashSet<>(); for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) { @@ -163,6 +165,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return Arrays.asList(instance); } + protected String trimAndDecodeUrl(String url){ + try { + return URLDecoder.decode(url.trim(), "UTF-8"); + } catch (UnsupportedEncodingException e) { + return url; + } + } + @Override protected List> prepareSources(final Document doc, final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5228d1d02d..cc903b49eb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; +import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; @@ -759,6 +760,40 @@ class MappersTest { assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); } + + @Test + void testXMLEncodedURL() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml"))); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + String decoded = "https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS"; + assertEquals(decoded, p.getInstance().get(0).getUrl().get(0)); + } + + @Test + void testXMLEncodedURL_ODF() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Dataset p = (Dataset) list.get(0); + assertTrue(p.getInstance().size() > 0); + for(String url : p.getInstance().get(0).getUrl()){ + System.out.println(url); + assertTrue(!url.contains("&")); + } + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml new file mode 100644 index 0000000000..c9cafea4f6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml @@ -0,0 +1,40 @@ + + +
+ r3c4b2081b22::0001f1a60b1acbb28dd66b4f6c7d881f + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + 2021-06-24T10:40:25.346Z + r3c4b2081b22 + 2021-06-24T10:40:55.153Z +
+ + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + Progress report on preparing Strategic plan for Technology transfer from EPPL/FMPI CU + OPEN + + Documents, reports + 0034 + Progress report on preparation process of the technology transfer plan for CU. + corda__h2020::692335 + + + + + file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fopendata%2Fcordis-h2020projectDeliverables.tsv + + + + + + + false + false + 0.9 + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml new file mode 100644 index 0000000000..b970605a69 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059&test=yes + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059&test=yes + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file