From a1cafaf2e36cc8c5de2354e723739b85aa1c95bd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 16 Nov 2021 15:16:28 +0100 Subject: [PATCH 01/44] added mvn site for dnet-hadoop project --- dhp-build/dhp-code-style/pom.xml | 5 +++++ dhp-build/pom.xml | 11 +++++++++++ dhp-build/src/site/site.xml | 22 ++++++++++++++++++++++ dhp-common/pom.xml | 7 +++++++ dhp-workflows/pom.xml | 7 +++++++ dhp-workflows/src/site/site.xml | 25 +++++++++++++++++++++++++ pom.xml | 5 +++++ src/site/site.xml | 21 +++++++++++++++++++++ 8 files changed, 103 insertions(+) create mode 100644 dhp-build/src/site/site.xml create mode 100644 dhp-workflows/src/site/site.xml create mode 100644 src/site/site.xml diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 77aa2aedb..7a6a32e0e 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -22,6 +22,10 @@ dnet45-releases https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + DHPSite + file://${dhp.site.stage.path}/site/dhp-build/dhp-code-style + @@ -43,6 +47,7 @@ UTF-8 + /tmp/dhp-site \ No newline at end of file diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 12b999b9c..fed689a06 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -10,6 +10,9 @@ pom This module is a container for the build tools used in dnet-hadoop + + true + dhp-code-style @@ -17,4 +20,12 @@ dhp-build-properties-maven-plugin + + + + DHPSite + file://${dhp.site.stage.path}/site/dhp-build + + + diff --git a/dhp-build/src/site/site.xml b/dhp-build/src/site/site.xml new file mode 100644 index 000000000..2d9d769a2 --- /dev/null +++ b/dhp-build/src/site/site.xml @@ -0,0 +1,22 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c057123b1..686b89f6b 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -13,6 +13,13 @@ dhp-common jar + + + DHPSite + file://${dhp.site.stage.path}/site/dhp-common + + + This module contains common utilities meant to be used across the dnet-hadoop submodules diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 22ee77619..89ba2bf70 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -15,6 +15,13 @@ This module is the container for the oozie workflow definitions in dnet-hadoop project + + + DHPSite + file://${dhp.site.stage.path}/site/dhp-workflows + + + dhp-workflow-profiles dhp-aggregation diff --git a/dhp-workflows/src/site/site.xml b/dhp-workflows/src/site/site.xml new file mode 100644 index 000000000..6b742db6a --- /dev/null +++ b/dhp-workflows/src/site/site.xml @@ -0,0 +1,25 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 71c55d1f0..f7e3c6226 100644 --- a/pom.xml +++ b/pom.xml @@ -719,6 +719,10 @@ dnet45-releases https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + DHPSite + file://${dhp.site.stage.path}/site/ + @@ -734,6 +738,7 @@ + /tmp/dhp-site UTF-8 UTF-8 3.6.0 diff --git a/src/site/site.xml b/src/site/site.xml new file mode 100644 index 000000000..634a2c154 --- /dev/null +++ b/src/site/site.xml @@ -0,0 +1,21 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + \ No newline at end of file From 2d67020c590cd4d779e978ca8688c632c0019a57 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 16 Nov 2021 16:01:08 +0100 Subject: [PATCH 02/44] added dhp-enrichment maven site template --- .../src/site/markdown/pubmed.md | 6 ++++- .../dhp-aggregation/src/site/site.xml | 1 - .../dhp-enrichment/src/site/markdown/about.md | 1 + .../dhp-enrichment/src/site/site.xml | 26 +++++++++++++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/site/markdown/about.md create mode 100644 dhp-workflows/dhp-enrichment/src/site/site.xml diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md index f6327a51b..00e3ed877 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md @@ -4,7 +4,7 @@ This section describes the mapping implemented for [MEDLINE/PubMed](https://pubm Collection --------- The native data is collected from [ftp baseline](https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/) containing XML with -the following [shcema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) +the following [schema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) Parsing @@ -50,6 +50,10 @@ The table below describes the mapping from the XML Native to the OAF mapping |//Author/FullName| author.Forename| Concatenation of forname + lastName if exist | |FOR ALL AUTHOR | author.rank| sequential number starting from 1| +#TODO + +Missing item mapped + diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml index da5da0f1e..c0a70264d 100644 --- a/dhp-workflows/dhp-aggregation/src/site/site.xml +++ b/dhp-workflows/dhp-aggregation/src/site/site.xml @@ -20,7 +20,6 @@ - diff --git a/dhp-workflows/dhp-enrichment/src/site/markdown/about.md b/dhp-workflows/dhp-enrichment/src/site/markdown/about.md new file mode 100644 index 000000000..c220c63b2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/site/markdown/about.md @@ -0,0 +1 @@ +#DHP Enrichment \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/site/site.xml b/dhp-workflows/dhp-enrichment/src/site/site.xml new file mode 100644 index 000000000..dad0cd996 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/site/site.xml @@ -0,0 +1,26 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + + + + + \ No newline at end of file From ec8b0219ff034ddeb3d8c878ecf40ec03337e658 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 17:41:34 +0100 Subject: [PATCH 03/44] [Documentation] Added first page for Integration via unresolved entities generation --- .../src/site/markdown/index.md | 17 +++++++-- .../src/site/markdown/integration.md | 36 +++++++++++++++++++ .../dhp-aggregation/src/site/site.xml | 3 ++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/integration.md diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md index c0c756082..240617f91 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md @@ -1,9 +1,20 @@ ##DHP-Aggregation -This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records. +This module defines a set of oozie workflows for -Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure +1. the **collection** and **transformation** of metadata records. +2. the **integration** of new external information in the result + + +### Collection and Transformation + +The workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping of each MDStore. -It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). \ No newline at end of file +It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). + +### Integration of external information in the result + +The workflows create new entity in the OpenAIRE format (OAF) which aim is to enrich the result already contained in the graph. +See integration section for more insight diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md new file mode 100644 index 000000000..ef19cfd24 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -0,0 +1,36 @@ +DHP Aggregation - Integration method +===================================== + +The integration method can be applied every time new information, which is not aggregated from the repositories +nor computed directly by OpenAIRE, should be added to the results of the graph. + +The information integrated so far is: + +1. Article impact measures + 1. [Bip!Finder](https://dl.acm.org/doi/10.1145/3357384.3357850) scores +2. Result Subjects + 1. Integration of Fields od Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in + results subjects. + + +The method always consists in the creation of a new entity in the OpenAIRE format (OAF entity) containing only the id +and the element in the OAF model that should be used to map the information we want to integrate. + +The id is set by using a particular encoding of the given PID + +*unresolved:[pid]:[pidtype]* + +where + +1 *unresolved* is a constant value +2 *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 +3 *pidtype* is the persistent id type, e.g. doi + +Such entities are matched against those available in the graph using the result.instance.pid values. + +This mechanism can be used to integrate enrichments produced as associated by a given PID. +If a match will be found with one of the results already in the graph that said result will be enriched with the information +present in the new OAF. +All the objects for which a match is not found are discarded. + + diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml index c0a70264d..75fc5032e 100644 --- a/dhp-workflows/dhp-aggregation/src/site/site.xml +++ b/dhp-workflows/dhp-aggregation/src/site/site.xml @@ -19,6 +19,9 @@ + + + From 4094f2bb9ab5ad87a31b6691b8b853542d6b7685 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Nov 2021 10:04:52 +0100 Subject: [PATCH 04/44] added integration md file --- .../dhp-aggregation/src/site/markdown/integration.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md index ef19cfd24..baf232e40 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -22,9 +22,9 @@ The id is set by using a particular encoding of the given PID where -1 *unresolved* is a constant value -2 *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 -3 *pidtype* is the persistent id type, e.g. doi +1. *unresolved* is a constant value +2. *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 +3. *pidtype* is the persistent id type, e.g. doi Such entities are matched against those available in the graph using the result.instance.pid values. From cded363b5595ae9dd3a9cf5cdbe300a86916b0a0 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 11:06:35 +0100 Subject: [PATCH 05/44] code refactor, created and moved scala code on the correct maven folder under src/main/scala and src/test/scala --- .../scholix/SparkCreateActionset.scala | 0 .../scholix/SparkSaveActionSet.scala | 2 +- .../dhp/collection/CollectionUtils.scala | 0 .../dhp/datacite/AbstractRestClient.scala | 0 .../dhp/datacite/DataciteAPIImporter.scala | 0 .../DataciteToOAFTransformation.scala | 0 .../GenerateDataciteDatasetSpark.scala | 3 +-- .../dnetlib/dhp/datacite/ImportDatacite.scala | 0 .../SparkDownloadUpdateDatacite.scala | 0 .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 1 + .../bio/SparkTransformBioDatabaseToOAF.scala | 12 +++++------ .../ebi/SparkCreateBaselineDataFrame.scala | 2 +- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 3 +-- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 5 ++--- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 0 .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 20 ++++++++----------- .../{actionmanager => }/datacite/record.json | 0 .../dhp/datacite/DataciteToOAFTest.scala | 0 .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 0 19 files changed, 21 insertions(+), 27 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala (96%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/collection/CollectionUtils.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/AbstractRestClient.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala (95%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/ImportDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala (73%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{java => scala}/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala (96%) rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/record.json (100%) rename dhp-workflows/dhp-aggregation/src/test/{java => scala}/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala (100%) rename dhp-workflows/dhp-aggregation/src/test/{java => scala}/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala (100%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index 1df7ea3fb..62d219b57 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, Oaf, Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index a63627d1c..3c8caa485 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -3,8 +3,7 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH -import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 70dcc0184..853b24862 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -7,6 +7,7 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.{compact, parse, render} import collection.JavaConverters._ + object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 73% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 8ae8285e3..fcceacd44 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,9 +1,9 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Oaf -import BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 17d21f19c..660a26a6c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} +import eu.dnetlib.dhp.sx.bio.pubmed._ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index eab6b1dc6..18e39387f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,9 +1,8 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 8da617ca0..12af4824b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -38,7 +37,7 @@ object SparkEBILinksToOaf { ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) + .flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index ecef32202..d5d40ecfe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -4,10 +4,9 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ -import scala.collection.JavaConverters._ import java.util.regex.Pattern - +import collection.JavaConverters._ /** * */ @@ -22,10 +21,10 @@ object PubMedToOaf { val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") - /** * Cleaning the DOI Applying regex in order to * remove doi starting with URL + * * @param doi input DOI * @return cleaned DOI */ @@ -49,7 +48,7 @@ object PubMedToOaf { * starting from OAF instanceType value * * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies + * @param vocabularies All dnet vocabularies * @return the correct instance */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { @@ -65,7 +64,7 @@ object PubMedToOaf { } /** - * Mapping the Pubmedjournal info into the OAF Journale + * Mapping the Pubmedjournal info into the OAF Journale * * @param j the pubmedJournal * @return the OAF Journal @@ -91,9 +90,8 @@ object PubMedToOaf { * Find vocabulary term into synonyms and term in the vocabulary * * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find - * + * @param vocabularies all the vocabularies + * @param term the term to find * @return the cleaned term value */ def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { @@ -104,10 +102,9 @@ object PubMedToOaf { /** - * Map the Pubmed Article into the OAF instance + * Map the Pubmed Article into the OAF instance * - * - * @param article the pubmed articles + * @param article the pubmed articles * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ @@ -185,7 +182,6 @@ object PubMedToOaf { //-------------------------------------------------------------------------------------- - // RESULT MAPPING //-------------------------------------------------------------------------------------- result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala rename to dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala From 2fd9ceac13d38ee4c2df06ab834de61ff8309972 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 11:35:22 +0100 Subject: [PATCH 06/44] code refactor, created and moved scala code on the correct maven folder under src/main/scala and src/test/scala --- .../doiboost/DoiBoostMappingUtil.scala | 0 .../SparkGenerateDOIBoostActionSet.scala | 0 .../doiboost/SparkGenerateDoiBoost.scala | 0 .../doiboost/crossref/Crossref2Oaf.scala | 9 +++--- .../doiboost/crossref/CrossrefDataset.scala | 25 ++++++++-------- .../doiboost/crossref/CrossrefImporter.java | 0 .../dnetlib/doiboost/crossref/ESClient.java | 0 .../crossref/ExtractCrossrefRecords.java | 0 .../crossref/GenerateCrossrefDataset.scala | 20 +++++-------- .../crossref/SparkMapDumpIntoOAF.scala | 4 +-- .../crossref/UnpackCrtossrefEntries.scala | 9 ++---- .../dnetlib/doiboost/mag/MagDataModel.scala | 2 +- .../mag/SparkImportMagIntoDataset.scala | 7 ++--- .../doiboost/mag/SparkProcessMAG.scala | 20 +++++-------- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 ++--- .../orcid/SparkConvertORCIDToOAF.scala | 8 ++--- .../doiboost/orcid/SparkPreprocessORCID.scala | 29 +++++++++---------- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 8 ++--- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 3 +- .../doiboost/DoiBoostHostedByMapTest.scala | 2 +- .../dnetlib}/doiboost/NormalizeDoiTest.scala | 2 +- .../crossref/CrossrefMappingTest.scala | 0 .../dnetlib/doiboost/mag/MAGMappingTest.scala | 2 +- .../orcid/MappingORCIDToOAFTest.scala | 3 +- .../doiboost/uw/UnpayWallMappingTest.scala | 4 +-- 25 files changed, 69 insertions(+), 95 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala (99%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala (77%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefImporter.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/ESClient.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala (73%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala (96%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala (88%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/MagDataModel.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala (91%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala (84%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala (67%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala (80%) rename dhp-workflows/dhp-doiboost/src/main/{java => scala}/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib/dhp => scala/eu/dnetlib}/doiboost/DoiBoostHostedByMapTest.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{java/eu/dnetlib/dhp => scala/eu/dnetlib}/doiboost/NormalizeDoiTest.scala (96%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/mag/MAGMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala (99%) rename dhp-workflows/dhp-doiboost/src/test/{java => scala}/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala (100%) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 1b1c850ba..edca4a180 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,20 +4,19 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JValue, _} +import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ import org.slf4j.{Logger, LoggerFactory} +import java.util import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex -import java.util - -import eu.dnetlib.doiboost.DoiBoostMappingUtil case class CrossrefDT(doi: String, json:String, timestamp: Long) {} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala similarity index 77% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 159b817c7..6a1c701af 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -6,7 +6,7 @@ import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -17,12 +17,12 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def to_item(input:String):CrossrefDT = { + def to_item(input: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val ts: Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } @@ -30,7 +30,6 @@ object CrossrefDataset { def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) parser.parseArgument(args) @@ -54,7 +53,7 @@ object CrossrefDataset { return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -66,7 +65,7 @@ object CrossrefDataset { if (a == null) return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -79,20 +78,20 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") - val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) - .map(i =>to_item(i))) + spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i => CrossrefImporter.decompressBlob(i._2.toString)) + .map(i => to_item(i))) main_ds.union(update).groupByKey(_.doi) .agg(crossrefAggregator.toColumn) - .map(s=>s._2) + .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala similarity index 73% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 526ff7b3a..6d03abc25 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -2,17 +2,12 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item -import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass -import org.apache.hadoop.io.{IntWritable, Text} -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.JArray -import org.json4s.jackson.JsonMethods.{compact, parse, render} +import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -24,11 +19,10 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] - def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) @@ -51,14 +45,14 @@ object GenerateCrossrefDataset { import spark.implicits._ - val tmp : RDD[String] = sc.textFile(sourcePath,6000) + val tmp: RDD[String] = sc.textFile(sourcePath, 6000) spark.createDataset(tmp) .map(entry => crossrefElement(entry)) .write.mode(SaveMode.Overwrite).save(targetPath) -// .map(meta => crossrefElement(meta)) -// .toDS.as[CrossrefDT] -// .write.mode(SaveMode.Overwrite).save(targetPath) + // .map(meta => crossrefElement(meta)) + // .toDS.as[CrossrefDT] + // .write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index c65916610..fa55b9fb9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -4,10 +4,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import org.apache.commons.io.IOUtils - import org.apache.spark.SparkConf - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 95ecb568b..191c4587e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -2,8 +2,8 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.io.compress.GzipCodec +import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST.JArray @@ -17,9 +17,7 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - - - def extractDump(input:String):List[String] = { + def extractDump(input: String): List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -30,7 +28,6 @@ object UnpackCrtossrefEntries { } - def main(args: Array[String]): Unit = { val conf = new SparkConf val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) @@ -45,7 +42,7 @@ object UnpackCrtossrefEntries { .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) + sc.wholeTextFiles(sourcePath, 6000).flatMap(d => extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index fd9629024..0a6fa00f0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import scala.collection.JavaConverters._ import scala.collection.mutable diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index a68d0bb2d..d25a4893f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -3,8 +3,8 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{SaveMode, SparkSession} import org.apache.spark.sql.types._ +import org.apache.spark.sql.{SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { @@ -24,13 +24,13 @@ object SparkImportMagIntoDataset { "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), + "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), @@ -75,7 +75,6 @@ object SparkImportMagIntoDataset { .master(parser.get("master")).getOrCreate() - stream.foreach { case (k, v) => val s: StructType = getSchema(k) val df = spark.read diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala similarity index 91% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 016279787..932725446 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -5,19 +5,16 @@ import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.{col, collect_list, struct} import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - object SparkProcessMAG { - def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ + def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) + .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), @@ -98,13 +95,13 @@ object SparkProcessMAG { var magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" ) + .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate", $"EndDate") val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] + .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate", $"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") @@ -122,7 +119,7 @@ object SparkProcessMAG { magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") @@ -148,11 +145,10 @@ object SparkProcessMAG { spark.read.load(s"$workingPath/mag_publication").as[Publication] .filter(p => p.getId == null) .groupByKey(p => p.getId) - .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) + .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 1cd3f7028..11031f9ca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -4,17 +4,16 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} -import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala similarity index 84% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index fa4a93e00..1b189e296 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -11,10 +11,10 @@ object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { + def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -35,8 +35,8 @@ object SparkConvertORCIDToOAF { val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark,workingPath, targetPath) + run(spark, workingPath, targetPath) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala similarity index 67% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index 31f331912..153be5dd1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -1,48 +1,45 @@ package eu.dnetlib.doiboost.orcid -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.functions.{col, collect_list} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkPreprocessORCID { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item :ORCIDItem):ORCIDItem = { - ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + def fixORCIDItem(item: ORCIDItem): ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) } - def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { + def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) + val inputRDD: RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s != null).filter(s => ORCIDToOAF.authorValid(s)) spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s != null) spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ + .map(i => { val doi = i._1.doi val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) + (doi, author) + }).groupBy(col("_1").alias("doi")) .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] .map(s => fixORCIDItem(s)) .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") @@ -67,4 +64,4 @@ object SparkPreprocessORCID { } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala similarity index 80% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 4530926f1..70290018d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -1,16 +1,14 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.application.ArgumentApplicationParser - import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { @@ -32,11 +30,11 @@ object SparkMapUnpayWallToOAF { val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath") + val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication] + val d: Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)).as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index c8324cde1..bf5694965 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -4,14 +4,13 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ -import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala index 4912648be..049ac37f4 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.doiboost +package eu.dnetlib.doiboost import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset} import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala index a9a841ee9..bdf845f19 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.doiboost +package eu.dnetlib.doiboost import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 46d4ec08d..7403e103e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.mag import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, SparkSession} import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index b484dc087..a5ce6296c 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -10,9 +10,8 @@ import org.junit.jupiter.api.io.TempDir import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path -import scala.io.Source - import scala.collection.JavaConversions._ +import scala.io.Source class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala index fa696fffc..012ed3da0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala @@ -3,11 +3,11 @@ package eu.dnetlib.doiboost.uw import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.OpenAccessRoute +import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} import scala.io.Source -import org.junit.jupiter.api.Assertions._ -import org.slf4j.{Logger, LoggerFactory} class UnpayWallMappingTest { From 1f5ee116ed7d897831fcb74818f6c5a5cc9ee76b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 12:23:52 +0100 Subject: [PATCH 07/44] code refactor, created and moved scala code on the correct maven folder under src/main/scala and src/test/scala fixed test --- .../sx/graphimport/SparkDataciteToOAF.scala | 31 ---- .../oa/graph/hostedbymap/Aggregators.scala | 2 +- .../SparkApplyHostedByMapToDatasource.scala | 13 +- .../SparkApplyHostedByMapToResult.scala | 18 +-- .../SparkPrepareHostedByInfoToApply.scala | 35 ++--- .../hostedbymap/SparkProduceHostedByMap.scala | 109 +++++++------ .../raw/CopyHdfsOafSparkApplication.scala | 10 +- .../resolution/SparkResolveEntities.scala | 25 ++- .../resolution/SparkResolveRelation.scala | 2 +- .../graph/SparkConvertDatasetToJsonRDD.scala | 10 +- .../sx/graph/SparkConvertObjectToJson.scala | 10 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 27 ++-- .../dhp/sx/graph/SparkCreateInputGraph.scala | 27 ++-- .../dhp/sx/graph/SparkCreateScholix.scala | 28 ++-- .../sx/graph/SparkCreateSummaryObject.scala | 12 +- .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 1 + .../SparkGeneratePanagaeaDataset.scala | 18 +-- .../dhp/sx/graph/scholix/ScholixUtils.scala | 143 +++++++++--------- .../dnetlib/dhp/oa/graph/resolution/dataset | 6 +- .../oa/graph/hostedbymap/DownloadCsvTest.java | 0 .../dhp/oa/graph/hostedbymap/TestApply.scala | 0 .../oa/graph/hostedbymap/TestPrepare.scala | 4 - .../oa/graph/hostedbymap/TestPreprocess.scala | 5 +- .../resolution/ResolveEntitiesTest.scala | 1 + .../sx/graph/scholix/ScholixGraphTest.scala | 0 .../dhp/sx/pangaea/PangaeaTransformTest.scala | 1 - 26 files changed, 235 insertions(+), 303 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala (81%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala (85%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala (74%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala (88%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala (79%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala (69%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala (62%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala (68%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{java => scala}/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala (96%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala (98%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{java => scala}/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala (95%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala deleted file mode 100644 index 9e905d806..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala +++ /dev/null @@ -1,31 +0,0 @@ -package eu.dnetlib.dhp.oa.sx.graphimport - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.SparkSession - -object SparkDataciteToOAF { - - - def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - import spark.implicits._ - - - val sc = spark.sparkContext - - val inputPath = parser.get("inputPath") - - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index ce383292c..ad4e1c96e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala similarity index 81% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 1b18ba3ae..38af3eee4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -2,13 +2,12 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} @@ -52,18 +51,18 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats: Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala similarity index 85% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 0e047d016..d360da2e9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -5,16 +5,13 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import eu.dnetlib.dhp.schema.oaf.{Instance, OpenAccessRoute, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - - object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { @@ -39,6 +36,7 @@ object SparkApplyHostedByMapToResult { p })(Encoders.bean(classOf[Publication])) } + def main(args: Array[String]): Unit = { @@ -67,18 +65,18 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs: Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + val pinfo: Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala similarity index 74% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index b7a7d352f..87e203e4b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,61 +3,58 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo - import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} - - object SparkPrepareHostedByInfoToApply { implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { - var lst:List[EntityInfo] = List() + def getList(id: String, j: Journal, name: String): List[EntityInfo] = { + var lst: List[EntityInfo] = List() - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } - if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst } - if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst } lst } - def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + def prepareResultInfo(spark: SparkSession, publicationPath: String): Dataset[EntityInfo] = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) val mapper = new ObjectMapper() - val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + val dd: Dataset[Publication] = spark.read.textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) - dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) } - def toEntityInfo(input:String): EntityInfo = { + def toEntityInfo(input: String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] toEntityItem(c.keys.head, c.values.head) } - def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) @@ -67,7 +64,7 @@ object SparkPrepareHostedByInfoToApply { Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 - if(t2._2 != null ){ + if (t2._2 != null) { val ds = t2._2 res.setHostedById(ds.getId) res.setOpenAccess(ds.getOpenAccess) @@ -107,10 +104,10 @@ object SparkPrepareHostedByInfoToApply { //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo: Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset: Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 1ee1d5d1a..6dfe35623 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,41 +1,39 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import com.fasterxml.jackson.databind.ObjectMapper -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileSystem -import org.apache.hadoop.fs.Path + import java.io.PrintWriter -import org.apache.hadoop.io.compress.GzipCodec - - object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 val gold: HostedByInfo = input._2 val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => null + case whoa => null } } @@ -44,7 +42,7 @@ object SparkProduceHostedByMap { implicit val formats = org.json4s.DefaultFormats - val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + val map: Map[String, HostedByItemType] = Map(input._1 -> input._2) Serialization.write(map) @@ -52,34 +50,33 @@ object SparkProduceHostedByMap { } - - def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { - if(issn != null){ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, issn, eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, eissn, "" , oa) + def getHostedByItemType(id: String, officialname: String, issn: String, eissn: String, issnl: String, oa: Boolean): HostedByItemType = { + if (issn != null) { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, issn, eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, issn, eissn, "", oa) } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, issn, "", issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, "", "" , oa) + } else { + if (issnl != null) { + HostedByItemType(id, officialname, issn, "", issnl, oa) + } else { + HostedByItemType(id, officialname, issn, "", "", oa) } } - }else{ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, "", eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, "", eissn, "" , oa) + } else { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, "", eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, "", eissn, "", oa) } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, "", "", issnl , oa) - }else{ - HostedByItemType("", "", "", "", "" , oa) + } else { + if (issnl != null) { + HostedByItemType(id, officialname, "", "", issnl, oa) + } else { + HostedByItemType("", "", "", "", "", oa) } } } @@ -90,10 +87,10 @@ object SparkProduceHostedByMap { return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) } - HostedByItemType("","","","","",false) + HostedByItemType("", "", "", "", "", false) } - def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ @@ -102,10 +99,10 @@ object SparkProduceHostedByMap { implicit var encoderD = Encoders.kryo[Datasource] - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + val dd: Dataset[Datasource] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[Datasource])) - dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } @@ -115,17 +112,17 @@ object SparkProduceHostedByMap { } - def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + def goldHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] val mapper = new ObjectMapper() - val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) - dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } @@ -134,41 +131,40 @@ object SparkProduceHostedByMap { return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) } - def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + def doajHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] val mapper = new ObjectMapper() - val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + val dd: Dataset[DOAJModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[DOAJModel])) - dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) } def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { - var lst : List[(String, HostedByItemType)] = List() - if(!input.issn.equals("")){ + var lst: List[(String, HostedByItemType)] = List() + if (!input.issn.equals("")) { lst = (input.issn, input) :: lst } - if(!input.eissn.equals("")){ + if (!input.eissn.equals("")) { lst = (input.eissn, input) :: lst } - if(!input.lissn.equals("")){ + if (!input.lissn.equals("")) { lst = (input.lissn, input) :: lst } lst } - - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { val conf = new Configuration() conf.set("fs.defaultFS", hdfsNameNode) - val fs= FileSystem.get(conf) + val fs = FileSystem.get(conf) val output = fs.create(new Path(outputPath)) val writer = new PrintWriter(output) try { @@ -182,7 +178,6 @@ object SparkProduceHostedByMap { } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -213,7 +208,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath, classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala similarity index 88% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index c7ad1890d..0179cc266 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -4,20 +4,14 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.commons.io.IOUtils -import org.apache.commons.lang3.StringUtils -import org.apache.http.client.methods.HttpGet -import org.apache.http.impl.client.HttpClients import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory -import scala.collection.JavaConverters._ import scala.io.Source - +import scala.collection.JavaConverters._ object CopyHdfsOafSparkApplication { def main(args: Array[String]): Unit = { @@ -59,7 +53,7 @@ object CopyHdfsOafSparkApplication { if (validPaths.nonEmpty) { val oaf = spark.read.load(validPaths: _*).as[Oaf] val mapper = new ObjectMapper() - val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val l = ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList l.foreach( e => oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala similarity index 79% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 316b8afed..6b4a501d6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils @@ -14,7 +13,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -51,10 +50,10 @@ object SparkResolveEntities { fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) } -} + } -def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ @@ -71,22 +70,22 @@ def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: St } - def deserializeObject(input:String, entity:EntityType ) :Result = { + def deserializeObject(input: String, entity: EntityType): Result = { - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software=> mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) - } + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software => mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) + } } - def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { + def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + val re: Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] entities.foreach { e => diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index cd517dd5e..c7f9b2d0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.{Relation, Result} +import eu.dnetlib.dhp.schema.oaf.Relation import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala similarity index 69% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 3ee0c7dd6..9d16cf907 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.Result import org.apache.commons.io.IOUtils import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf @@ -29,13 +29,13 @@ object SparkConvertDatasetToJsonRDD { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val resultObject = List("publication","dataset","software", "otherResearchProduct") + val resultObject = List("publication", "dataset", "software", "otherResearchProduct") val mapper = new ObjectMapper() - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - resultObject.foreach{item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + resultObject.foreach { item => + spark.read.load(s"$sourcePath/$item").as[Result].map(r => mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index 846ac37af..cc1b97fd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import org.apache.hadoop.io.compress._ object SparkConvertObjectToJson { @@ -32,8 +32,8 @@ object SparkConvertObjectToJson { log.info(s"objectType -> $objectType") - implicit val scholixEncoder :Encoder[Scholix]= Encoders.kryo[Scholix] - implicit val summaryEncoder :Encoder[ScholixSummary]= Encoders.kryo[ScholixSummary] + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] val mapper = new ObjectMapper @@ -42,11 +42,11 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala similarity index 62% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 4b82fe645..2eb5e3a35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,11 +2,12 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { @@ -31,39 +32,39 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + val rddDataset = spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + val rddPublication = spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + val rddSoftware = spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + val rddOtherResearchProduct = spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") - val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") + val relationSemanticFilter = List("cites", "iscitedby", "merges", "ismergedin") - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation = spark.sparkContext.textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index 350b00c5e..b6f678967 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,14 +1,12 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - - object SparkCreateInputGraph { def main(args: Array[String]): Unit = { @@ -33,7 +31,7 @@ object SparkCreateInputGraph { ) - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) @@ -41,16 +39,13 @@ object SparkCreateInputGraph { implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - - - val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val oafDs:Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] + val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] log.info("Extract Publication") @@ -70,27 +65,27 @@ object SparkCreateInputGraph { resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) + makeDatasetUnique(s"$targetPath/extracted/${r._1}", s"$targetPath/preprocess/${r._1}", spark, r._2) } } - def extractEntities[T <: Oaf ](oafDs:Dataset[Oaf], targetPath:String, clazz:Class[T], log:Logger) :Unit = { + def extractEntities[T <: Oaf](oafDs: Dataset[Oaf], targetPath: String, clazz: Class[T], log: Logger): Unit = { - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) log.info(s"Extract ${clazz.getSimpleName}") oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) } - def makeDatasetUnique[T <: Result ](sourcePath:String, targetPath:String, spark:SparkSession, clazz:Class[T]) :Unit = { + def makeDatasetUnique[T <: Result](sourcePath: String, targetPath: String, spark: SparkSession, clazz: Class[T]): Unit = { import spark.implicits._ - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - val ds:Dataset[T] = spark.read.load(sourcePath).as[T] + val ds: Dataset[T] = spark.read.load(sourcePath).as[T] - ds.groupByKey(_.getId).reduceGroups{(x,y) => + ds.groupByKey(_.getId).reduceGroups { (x, y) => x.mergeFrom(y) x }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index e4fcd2782..9930c57af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.count -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateScholix { @@ -42,7 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] @@ -51,54 +51,54 @@ object SparkCreateScholix { relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => - if (input._1!= null && input._2!= null) { + if (input._1 != null && input._2 != null) { val rel: Relation = input._1._2 val source: ScholixSummary = input._2._2 (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) } - else null + else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .filter(r => r!= null) + .filter(r => r != null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Scholix), (String, ScholixSummary)) => - if (input._2== null) { + if (input._2 == null) { null } else { val s: Scholix = input._1._2 val target: ScholixSummary = input._2._2 ScholixUtils.generateCompleteScholix(s, target) } - }.filter(s => s!= null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") + }.filter(s => s != null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] - .map(s=> (s.getIdentifier,s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) .groupByKey(_._1) .agg(ScholixUtils.scholixAggregator.toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") - val scholix_final:Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] + val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - val stats:Dataset[(String,String,Long)]= scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String,String,Long)] + val stats: Dataset[(String, String, Long)] = scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String, String, Long)] stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 )) + .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0)) .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset+b.relatedDataset, a.relatedPublication+b.relatedPublication)) + .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset + b.relatedDataset, a.relatedPublication + b.relatedPublication)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") - val relatedEntitiesDS:Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication>0 || r.relatedDataset > 0) + val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map{i => + relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map { i => val re = i._1 val sum = i._2._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 0970375f5..4274cae5a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateSummaryObject { @@ -28,15 +28,15 @@ object SparkCreateSummaryObject { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) + val ds: Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s != null).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index 193512474..c70397d04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -5,6 +5,7 @@ import org.apache.spark.sql.{Encoder, Encoders} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse + import java.util.regex.Pattern import scala.language.postfixOps import scala.xml.{Elem, Node, XML} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index 79c75d6df..f1a4553ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -2,13 +2,12 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ import scala.io.Source - +import scala.collection.JavaConverters._ object SparkGeneratePanagaeaDataset { @@ -28,17 +27,17 @@ object SparkGeneratePanagaeaDataset { parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") - val sc:SparkContext = spark.sparkContext + val sc: SparkContext = spark.sparkContext - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD:RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD: RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) spark.createDataset(inputRDD).as[PangaeaDataModel] - .map(s => (s.identifier,s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) - .groupByKey(_._1)(Encoders.STRING) + .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) + .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") @@ -46,7 +45,4 @@ object SparkGeneratePanagaeaDataset { } - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 93c554e04..7b1ddbb8f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.sx.graph.scholix - import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.sx.scholix._ import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} @@ -11,22 +10,23 @@ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import scala.collection.JavaConverters._ import scala.io.Source -import scala.language.postfixOps +import scala.collection.JavaConverters._ + object ScholixUtils { val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" - val DATE_RELATION_KEY:String = "RelationDate" - case class RelationVocabulary(original:String, inverse:String){} + val DATE_RELATION_KEY: String = "RelationDate" - case class RelatedEntities(id:String, relatedDataset:Long, relatedPublication:Long){} + case class RelationVocabulary(original: String, inverse: String) {} - val relations:Map[String, RelationVocabulary] = { - val input =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString + case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} + + val relations: Map[String, RelationVocabulary] = { + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -35,12 +35,12 @@ object ScholixUtils { } - def extractRelationDate(relation: Relation):String = { + def extractRelationDate(relation: Relation): String = { - if (relation.getProperties== null || !relation.getProperties.isEmpty) + if (relation.getProperties == null || !relation.getProperties.isEmpty) null else { - val date =relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) + val date = relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) if (date.isDefined) date.get else @@ -48,9 +48,9 @@ object ScholixUtils { } } - def extractRelationDate(summary: ScholixSummary):String = { + def extractRelationDate(summary: ScholixSummary): String = { - if(summary.getDate== null || summary.getDate.isEmpty) + if (summary.getDate == null || summary.getDate.isEmpty) null else { summary.getDate.get(0) @@ -59,15 +59,14 @@ object ScholixUtils { } - def inverseRelationShip(rel:ScholixRelationship):ScholixRelationship = { + def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) } - - val statsAggregator:Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] with Serializable { + val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { override def zero: RelatedEntities = null override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { @@ -78,17 +77,16 @@ object ScholixUtils { if (b == null) RelatedEntities(a._1, relatedDataset, relatedPublication) else - RelatedEntities(a._1,b.relatedDataset+ relatedDataset, b.relatedPublication+ relatedPublication ) + RelatedEntities(a._1, b.relatedDataset + relatedDataset, b.relatedPublication + relatedPublication) } override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1!= null && b2!= null) - RelatedEntities(b1.id, b1.relatedDataset+ b2.relatedDataset, b1.relatedPublication+ b2.relatedPublication) + if (b1 != null && b2 != null) + RelatedEntities(b1.id, b1.relatedDataset + b2.relatedDataset, b1.relatedPublication + b2.relatedPublication) + else if (b1 != null) + b1 else - if (b1!= null) - b1 - else b2 } @@ -104,12 +102,12 @@ object ScholixUtils { override def zero: Scholix = null - def scholix_complete(s:Scholix):Boolean ={ - if (s== null || s.getIdentifier==null) { + def scholix_complete(s: Scholix): Boolean = { + if (s == null || s.getIdentifier == null) { false } else if (s.getSource == null || s.getTarget == null) { - false - } + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) false else @@ -121,7 +119,7 @@ object ScholixUtils { } override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 + if (scholix_complete(b1)) b1 else b2 } override def finish(reduction: Scholix): Scholix = reduction @@ -132,7 +130,7 @@ object ScholixUtils { } - def createInverseScholixRelation(scholix: Scholix):Scholix = { + def createInverseScholixRelation(scholix: Scholix): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -144,34 +142,33 @@ object ScholixUtils { s - } - def extractCollectedFrom(summary:ScholixSummary): List[ScholixEntityId] = { - if (summary.getDatasources!= null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map{ + def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { + if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) }(collection.breakOut) - l + l } else List() } - def extractCollectedFrom(relation: Relation) : List[ScholixEntityId] = { + def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA,null)).asJava) + new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) }(collection breakOut) l } else List() } - def generateCompleteScholix(scholix: Scholix, target:ScholixSummary): Scholix = { + def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -192,29 +189,28 @@ object ScholixUtils { r.setObjectType(summaryObject.getTypology.toString) r.setObjectSubType(summaryObject.getSubType) - if (summaryObject.getTitle!= null && !summaryObject.getTitle.isEmpty) - r.setTitle(summaryObject.getTitle.get(0)) + if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty) + r.setTitle(summaryObject.getTitle.get(0)) - if (summaryObject.getAuthor!= null && !summaryObject.getAuthor.isEmpty){ - val l:List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a,null)).toList + if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { + val l: List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList if (l.nonEmpty) r.setCreator(l.asJava) } - if (summaryObject.getDate!= null && !summaryObject.getDate.isEmpty) + if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) r.setPublicationDate(summaryObject.getDate.get(0)) - if (summaryObject.getPublisher!= null && !summaryObject.getPublisher.isEmpty) - { - val plist:List[ScholixEntityId] =summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { + val plist: List[ScholixEntityId] = summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList if (plist.nonEmpty) r.setPublisher(plist.asJava) } - if (summaryObject.getDatasources!= null && !summaryObject.getDatasources.isEmpty) { + if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { - val l:List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( + val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) , "collected", "complete" @@ -228,12 +224,9 @@ object ScholixUtils { } + def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { - - - def scholixFromSource(relation:Relation, source:ScholixSummary):Scholix = { - - if (relation== null || source== null) + if (relation == null || source == null) return null val s = new Scholix @@ -253,9 +246,9 @@ object ScholixUtils { s.setPublicationDate(d) - if (source.getPublisher!= null && !source.getPublisher.isEmpty) { + if (source.getPublisher != null && !source.getPublisher.isEmpty) { val l: List[ScholixEntityId] = source.getPublisher.asScala - .map{ + .map { p => new ScholixEntityId(p, null) }(collection.breakOut) @@ -265,7 +258,7 @@ object ScholixUtils { } val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) - if (semanticRelation== null) + if (semanticRelation == null) return null s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) s.setSource(generateScholixResourceFromSummary(source)) @@ -274,8 +267,8 @@ object ScholixUtils { } - def findURLForPID(pidValue:List[StructuredProperty], urls:List[String]):List[(StructuredProperty, String)] = { - pidValue.map{ + def findURLForPID(pidValue: List[StructuredProperty], urls: List[String]): List[(StructuredProperty, String)] = { + pidValue.map { p => val pv = p.getValue @@ -285,67 +278,67 @@ object ScholixUtils { } - def extractTypedIdentifierFromInstance(r:Result):List[ScholixIdentifier] = { + def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { if (r.getInstance() == null || r.getInstance().isEmpty) return List() - r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) - .filter(i => i.getPid!= null && i.getUrl != null) + r.getInstance().asScala.filter(i => i.getUrl != null && !i.getUrl.isEmpty) + .filter(i => i.getPid != null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList } - def resultToSummary(r:Result):ScholixSummary = { + def resultToSummary(r: Result): ScholixSummary = { val s = new ScholixSummary s.setId(r.getId) if (r.getPid == null || r.getPid.isEmpty) return null - val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) if (persistentIdentifiers.isEmpty) return null s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication] ) + if (r.isInstanceOf[Publication]) s.setTypology(Typology.publication) else s.setTypology(Typology.dataset) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) - if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { - val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { + val titles: List[String] = r.getTitle.asScala.map(t => t.getValue)(collection breakOut) if (titles.nonEmpty) s.setTitle(titles.asJava) else - return null + return null } - if(r.getAuthor!= null && !r.getAuthor.isEmpty) { - val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) + if (r.getAuthor != null && !r.getAuthor.isEmpty) { + val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname)(collection breakOut) if (authors nonEmpty) s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) + val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } - if (r.getDescription!= null && !r.getDescription.isEmpty) { - val d = r.getDescription.asScala.find(f => f!= null && f.getValue!=null) + if (r.getDescription != null && !r.getDescription.isEmpty) { + val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) if (d.isDefined) s.setDescription(d.get.getValue) } - if (r.getSubject!= null && !r.getSubject.isEmpty) { - val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + if (r.getSubject != null && !r.getSubject.isEmpty) { + val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) if (subjects.nonEmpty) s.setSubject(subjects.asJava) } - if (r.getPublisher!= null) + if (r.getPublisher != null) s.setPublisher(List(r.getPublisher.getValue).asJava) - if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { - val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { + val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset index 05c875148..2c73183e2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -1,3 +1,3 @@ -{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} -{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} -{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index a3a753a8a..7abce547f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -3,13 +3,9 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} -import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} -import org.json4s import org.json4s.DefaultFormats -import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala similarity index 98% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 5b00e9b6f..0922f2e19 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -4,10 +4,9 @@ import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats -import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.Assertions._ import org.json4s.jackson.Serialization.write +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test class TestPreprocess extends java.io.Serializable{ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 9a142d3c0..f1bd841d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -159,6 +159,7 @@ class ResolveEntitiesTest extends Serializable { val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala similarity index 95% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala index b90827e81..0d89cca85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala @@ -3,7 +3,6 @@ package eu.dnetlib.dhp.sx.pangaea import eu.dnetlib.dhp.sx.graph.pangaea.PangaeaUtils import org.junit.jupiter.api.Test -import java.util.TimeZone import java.text.SimpleDateFormat import java.util.Date import scala.io.Source From 9c82d670b87219fa28d21b582b8147af957de5ea Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Nov 2021 12:31:02 +0100 Subject: [PATCH 08/44] make class public in order to create javadoc --- .../eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java | 2 +- .../oa/matchers/simple/EnrichMissingPublicationDateTest.java | 2 +- .../eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java | 2 +- .../test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java | 2 +- .../eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java (100%) diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 45bfc785f..52e9917bb 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -class UpdateMatcherTest { +public class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 550ded9f4..5af81a31a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -class EnrichMissingPublicationDateTest { +public class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index b532aa9f7..d93390e4a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -class SubscriptionUtilsTest { +public class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index a8bc03e31..117bdeef4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -class TrustUtilsTest { +public class TrustUtilsTest { private static final double THRESHOLD = 0.95; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java From 793b5a8e5f7197c7b904b9348a80aecb70f47d4d Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Thu, 18 Nov 2021 14:49:38 +0100 Subject: [PATCH 09/44] Aggiornare 'dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java' Removing the dump of Measure at the level of the result. We decided not to map it --- .../dhp/oa/graph/dump/ResultMapper.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 49468540d..153d91314 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -43,15 +43,15 @@ public class ResultMapper implements Serializable { try { addTypeSpecificInformation(out, input, ort); - Optional> mes = Optional.ofNullable(input.getMeasures()); - if (mes.isPresent()) { - List measure = new ArrayList<>(); - mes - .get() - .forEach( - m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - out.setMeasures(measure); - } + // Optional> mes = Optional.ofNullable(input.getMeasures()); + // if (mes.isPresent()) { + // List measure = new ArrayList<>(); + // mes + // .get() + // .forEach( + // m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); + // out.setMeasures(measure); + // } Optional .ofNullable(input.getAuthor()) From bb5dca797948d8771dc4eb32eb246d9ecf4d829b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 18 Nov 2021 17:10:46 +0100 Subject: [PATCH 10/44] cleanup --- .../java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 153d91314..1beea5cb4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -43,15 +43,6 @@ public class ResultMapper implements Serializable { try { addTypeSpecificInformation(out, input, ort); - // Optional> mes = Optional.ofNullable(input.getMeasures()); - // if (mes.isPresent()) { - // List measure = new ArrayList<>(); - // mes - // .get() - // .forEach( - // m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - // out.setMeasures(measure); - // } Optional .ofNullable(input.getAuthor()) From c0750fb17c1950193646c3501ab8303c1634a454 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 18 Nov 2021 17:11:31 +0100 Subject: [PATCH 11/44] avoid non necessary count operations over large spark datasets --- .../eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index bf0b7f687..9cc003bf6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -63,7 +63,9 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { .toJavaRDD() .filter(x -> !isOpenorgs(x)); - log.info("Number of non-Openorgs relations collected: {}", simRels.count()); + if (log.isDebugEnabled()) { + log.debug("Number of non-Openorgs relations collected: {}", simRels.count()); + } spark .createDataset(simRels.rdd(), Encoders.bean(Relation.class)) From a24b9f8268a3e36a5032d40a103793677ab1dd8b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 18 Nov 2021 17:12:02 +0100 Subject: [PATCH 12/44] [dedup] trivial refactoring --- .../eu/dnetlib/dhp/oa/dedup/DedupUtility.java | 24 +++++++++++++++++++ .../dhp/oa/dedup/SparkCreateSimRels.java | 23 ++---------------- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 23 ++---------------- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index d79d24653..aeb485768 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -14,6 +14,9 @@ import org.xml.sax.SAXException; import com.google.common.collect.Sets; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -152,4 +155,25 @@ public class DedupUtility { return o1.compareTo(o2); } + public static Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass(ModelConstants.IS_SIMILAR_TO); + r.setDataInfo(new DataInfo()); + + switch (entity) { + case "result": + r.setRelType(ModelConstants.RESULT_RESULT); + break; + case "organization": + r.setRelType(ModelConstants.ORG_ORG_RELTYPE); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } + } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index f89f634b5..3aa8f241d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -20,6 +20,7 @@ import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -102,7 +103,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { .createDataset( Deduper .computeRelations(sc, blocks, dedupConf) - .map(t -> createSimRel(t._1(), t._2(), entity)) + .map(t -> DedupUtility.createSimRel(t._1(), t._2(), entity)) .repartition(numPartitions) .rdd(), Encoders.bean(Relation.class)); @@ -111,24 +112,4 @@ public class SparkCreateSimRels extends AbstractSparkAction { } } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); - - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index 7d91e47cc..1cfac9a27 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -124,31 +124,12 @@ public class SparkWhitelistSimRels extends AbstractSparkAction { Dataset whiteListSimRels = whiteListRels2 .map( - (MapFunction, Relation>) r -> createSimRel(r._1(), r._2(), entity), + (MapFunction, Relation>) r -> DedupUtility + .createSimRel(r._1(), r._2(), entity), Encoders.bean(Relation.class)); saveParquet(whiteListSimRels, outputPath, SaveMode.Append); } } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); - - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } } From bd9a43cefdd54b51a4cc542a3b8cd87696d59003 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Nov 2021 09:20:43 +0100 Subject: [PATCH 13/44] Revert to 4094f2bb9ab5ad87a31b6691b8b853542d6b7685 --- .../oaf/utils/GraphCleaningFunctions.java | 4 +- .../scholix/SparkCreateActionset.scala | 0 .../scholix/SparkSaveActionSet.scala | 2 +- .../dhp/collection/CollectionUtils.scala | 0 .../dhp/datacite/AbstractRestClient.scala | 0 .../dhp/datacite/DataciteAPIImporter.scala | 0 .../DataciteToOAFTransformation.scala | 0 .../GenerateDataciteDatasetSpark.scala | 3 +- .../dnetlib/dhp/datacite/ImportDatacite.scala | 0 .../SparkDownloadUpdateDatacite.scala | 0 .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 1 - .../bio/SparkTransformBioDatabaseToOAF.scala | 12 +- .../ebi/SparkCreateBaselineDataFrame.scala | 2 +- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 3 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 5 +- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 0 .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 20 ++- .../dhp/datacite/DataciteToOAFTest.scala | 0 .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 0 .../{ => actionmanager}/datacite/record.json | 0 .../broker/oa/matchers/UpdateMatcherTest.java | 2 +- .../EnrichMissingPublicationDateTest.java | 2 +- .../broker/oa/util/SubscriptionUtilsTest.java | 2 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 2 +- .../doiboost/DoiBoostMappingUtil.scala | 0 .../SparkGenerateDOIBoostActionSet.scala | 0 .../doiboost/SparkGenerateDoiBoost.scala | 0 .../doiboost/crossref/Crossref2Oaf.scala | 9 +- .../doiboost/crossref/CrossrefDataset.scala | 25 +-- .../doiboost/crossref/CrossrefImporter.java | 0 .../dnetlib/doiboost/crossref/ESClient.java | 0 .../crossref/ExtractCrossrefRecords.java | 0 .../crossref/GenerateCrossrefDataset.scala | 20 ++- .../crossref/SparkMapDumpIntoOAF.scala | 4 +- .../crossref/UnpackCrtossrefEntries.scala | 9 +- .../dnetlib/doiboost/mag/MagDataModel.scala | 2 +- .../mag/SparkImportMagIntoDataset.scala | 7 +- .../doiboost/mag/SparkProcessMAG.scala | 20 ++- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 +- .../orcid/SparkConvertORCIDToOAF.scala | 8 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 29 ++-- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 8 +- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 3 +- .../doiboost/DoiBoostHostedByMapTest.scala | 2 +- .../dhp}/doiboost/NormalizeDoiTest.scala | 2 +- .../crossref/CrossrefMappingTest.scala | 0 .../dnetlib/doiboost/mag/MAGMappingTest.scala | 2 +- .../orcid/MappingORCIDToOAFTest.scala | 3 +- .../doiboost/uw/UnpayWallMappingTest.scala | 4 +- .../oa/graph/hostedbymap/Aggregators.scala | 2 +- .../SparkApplyHostedByMapToDatasource.scala | 13 +- .../SparkApplyHostedByMapToResult.scala | 18 ++- .../SparkPrepareHostedByInfoToApply.scala | 35 +++-- .../hostedbymap/SparkProduceHostedByMap.scala | 109 ++++++------- .../raw/CopyHdfsOafSparkApplication.scala | 10 +- .../resolution/SparkResolveEntities.scala | 25 +-- .../resolution/SparkResolveRelation.scala | 2 +- .../sx/graphimport/SparkDataciteToOAF.scala | 31 ++++ .../graph/SparkConvertDatasetToJsonRDD.scala | 10 +- .../sx/graph/SparkConvertObjectToJson.scala | 10 +- .../sx/graph/SparkConvertRDDtoDataset.scala | 27 ++-- .../dhp/sx/graph/SparkCreateInputGraph.scala | 27 ++-- .../dhp/sx/graph/SparkCreateScholix.scala | 28 ++-- .../sx/graph/SparkCreateSummaryObject.scala | 12 +- .../dhp/sx/graph/pangaea/PangaeaUtils.scala | 1 - .../SparkGeneratePanagaeaDataset.scala | 18 ++- .../dhp/sx/graph/scholix/ScholixUtils.scala | 143 +++++++++--------- .../dhp/oa/graph/hostedbymap/TestApply.scala | 0 .../oa/graph/hostedbymap/TestPrepare.scala | 4 + .../oa/graph/hostedbymap/TestPreprocess.scala | 5 +- .../resolution/ResolveEntitiesTest.scala | 1 - .../sx/graph/scholix/ScholixGraphTest.scala | 0 .../dhp/sx/pangaea/PangaeaTransformTest.scala | 1 + .../dnetlib/dhp/oa/graph/resolution/dataset | 6 +- 74 files changed, 431 insertions(+), 331 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala (96%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/collection/CollectionUtils.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/AbstractRestClient.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala (95%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/ImportDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala (73%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala (100%) rename dhp-workflows/dhp-aggregation/src/main/{scala => java}/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala (96%) rename dhp-workflows/dhp-aggregation/src/test/{scala => java}/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala (100%) rename dhp-workflows/dhp-aggregation/src/test/{scala => java}/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala (100%) rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/{ => actionmanager}/datacite/record.json (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala (99%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala (77%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/CrossrefImporter.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/ESClient.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala (73%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala (96%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala (88%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/mag/MagDataModel.scala (100%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala (91%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala (84%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala (67%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala (80%) rename dhp-workflows/dhp-doiboost/src/main/{scala => java}/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{scala/eu/dnetlib => java/eu/dnetlib/dhp}/doiboost/DoiBoostHostedByMapTest.scala (98%) rename dhp-workflows/dhp-doiboost/src/test/{scala/eu/dnetlib => java/eu/dnetlib/dhp}/doiboost/NormalizeDoiTest.scala (96%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/mag/MAGMappingTest.scala (100%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala (99%) rename dhp-workflows/dhp-doiboost/src/test/{scala => java}/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala (81%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala (85%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala (74%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala (88%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala (79%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala (99%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala (69%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala (62%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala (76%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala (68%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala (83%) rename dhp-workflows/dhp-graph-mapper/src/main/{scala => java}/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala (61%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala (96%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala (98%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala (99%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala (100%) rename dhp-workflows/dhp-graph-mapper/src/test/{scala => java}/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala (95%) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 43413b311..d8b1cded8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -27,8 +27,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static final int ORCID_LEN = 19; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*"; - public static final String TITLE_FILTER_REGEX = "(test)|\\W|\\d"; - public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; + public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]"; + public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10; public static T fixVocabularyNames(T value) { if (value instanceof Datasource) { diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index 62d219b57..1df7ea3fb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, Oaf, Publication, Software, OtherResearchProduct, Relation} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index 3c8caa485..a63627d1c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -3,7 +3,8 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} +import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH +import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index 853b24862..70dcc0184 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -7,7 +7,6 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.{compact, parse, render} import collection.JavaConverters._ - object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 73% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index fcceacd44..8ae8285e3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,9 +1,9 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved +import BioDBToOAF.ScholixResolved +import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 660a26a6c..17d21f19c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed._ +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index 18e39387f..eab6b1dc6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,8 +1,9 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 12af4824b..8da617ca0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -37,7 +38,7 @@ object SparkEBILinksToOaf { ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i => CollectionUtils.fixRelations(i)).filter(i => i != null) + .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index d5d40ecfe..ecef32202 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -4,9 +4,10 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ +import scala.collection.JavaConverters._ import java.util.regex.Pattern -import collection.JavaConverters._ + /** * */ @@ -21,10 +22,10 @@ object PubMedToOaf { val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + /** * Cleaning the DOI Applying regex in order to * remove doi starting with URL - * * @param doi input DOI * @return cleaned DOI */ @@ -48,7 +49,7 @@ object PubMedToOaf { * starting from OAF instanceType value * * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies + * @param vocabularies All dnet vocabularies * @return the correct instance */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { @@ -64,7 +65,7 @@ object PubMedToOaf { } /** - * Mapping the Pubmedjournal info into the OAF Journale + * Mapping the Pubmedjournal info into the OAF Journale * * @param j the pubmedJournal * @return the OAF Journal @@ -90,8 +91,9 @@ object PubMedToOaf { * Find vocabulary term into synonyms and term in the vocabulary * * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find + * @param vocabularies all the vocabularies + * @param term the term to find + * * @return the cleaned term value */ def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { @@ -102,9 +104,10 @@ object PubMedToOaf { /** - * Map the Pubmed Article into the OAF instance + * Map the Pubmed Article into the OAF instance * - * @param article the pubmed articles + * + * @param article the pubmed articles * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ @@ -182,6 +185,7 @@ object PubMedToOaf { //-------------------------------------------------------------------------------------- + // RESULT MAPPING //-------------------------------------------------------------------------------------- result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/datacite/record.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 52e9917bb..45bfc785f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -public class UpdateMatcherTest { +class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 5af81a31a..550ded9f4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -public class EnrichMissingPublicationDateTest { +class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index d93390e4a..b532aa9f7 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -public class SubscriptionUtilsTest { +class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 117bdeef4..a8bc03e31 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index edca4a180..1b1c850ba 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,19 +4,20 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST._ +import org.json4s.JsonAST.{JValue, _} import org.json4s.jackson.JsonMethods._ import org.slf4j.{Logger, LoggerFactory} -import java.util import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex +import java.util + +import eu.dnetlib.doiboost.DoiBoostMappingUtil case class CrossrefDT(doi: String, json:String, timestamp: Long) {} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala similarity index 77% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 6a1c701af..159b817c7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -6,7 +6,7 @@ import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -17,12 +17,12 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def to_item(input: String): CrossrefDT = { + def to_item(input:String):CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val ts: Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } @@ -30,6 +30,7 @@ object CrossrefDataset { def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) parser.parseArgument(args) @@ -53,7 +54,7 @@ object CrossrefDataset { return b - if (a.timestamp > b.timestamp) { + if(a.timestamp >b.timestamp) { return a } b @@ -65,7 +66,7 @@ object CrossrefDataset { if (a == null) return b - if (a.timestamp > b.timestamp) { + if(a.timestamp >b.timestamp) { return a } b @@ -78,20 +79,20 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val workingPath: String = parser.get("workingPath") + val workingPath:String = parser.get("workingPath") - val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i => CrossrefImporter.decompressBlob(i._2.toString)) - .map(i => to_item(i))) + spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) + .map(i =>to_item(i))) main_ds.union(update).groupByKey(_.doi) .agg(crossrefAggregator.toColumn) - .map(s => s._2) + .map(s=>s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefImporter.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ESClient.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala similarity index 73% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 6d03abc25..526ff7b3a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -2,12 +2,17 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item +import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass +import org.apache.hadoop.io.{IntWritable, Text} +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods.parse +import org.json4s.JsonAST.JArray +import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -19,10 +24,11 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] + def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) @@ -45,14 +51,14 @@ object GenerateCrossrefDataset { import spark.implicits._ - val tmp: RDD[String] = sc.textFile(sourcePath, 6000) + val tmp : RDD[String] = sc.textFile(sourcePath,6000) spark.createDataset(tmp) .map(entry => crossrefElement(entry)) .write.mode(SaveMode.Overwrite).save(targetPath) - // .map(meta => crossrefElement(meta)) - // .toDS.as[CrossrefDT] - // .write.mode(SaveMode.Overwrite).save(targetPath) +// .map(meta => crossrefElement(meta)) +// .toDS.as[CrossrefDT] +// .write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index fa55b9fb9..c65916610 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -4,8 +4,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import org.apache.commons.io.IOUtils + import org.apache.spark.SparkConf -import org.apache.spark.sql._ + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala similarity index 88% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 191c4587e..95ecb568b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -2,8 +2,8 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.io.compress.GzipCodec -import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST.JArray @@ -17,7 +17,9 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - def extractDump(input: String): List[String] = { + + + def extractDump(input:String):List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -28,6 +30,7 @@ object UnpackCrtossrefEntries { } + def main(args: Array[String]): Unit = { val conf = new SparkConf val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) @@ -42,7 +45,7 @@ object UnpackCrtossrefEntries { .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath, 6000).flatMap(d => extractDump(d._2)) + sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala index 0a6fa00f0..fd9629024 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import scala.collection.JavaConverters._ import scala.collection.mutable diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index d25a4893f..a68d0bb2d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -3,8 +3,8 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.types._ import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.types._ import org.slf4j.{Logger, LoggerFactory} object SparkImportMagIntoDataset { @@ -24,13 +24,13 @@ object SparkImportMagIntoDataset { "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), + "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), @@ -75,6 +75,7 @@ object SparkImportMagIntoDataset { .master(parser.get("master")).getOrCreate() + stream.foreach { case (k, v) => val s: StructType = getSchema(k) val df = spark.read diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala similarity index 91% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 932725446..016279787 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -5,16 +5,19 @@ import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.functions.{col, collect_list, struct} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions._ import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} + import scala.collection.JavaConverters._ + object SparkProcessMAG { - def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { + def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ d.where(col("Doi").isNotNull) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) + .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) .map(_._2)(Encoders.product[MagPapers]) .map(mp => { new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), @@ -95,13 +98,13 @@ object SparkProcessMAG { var magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate", $"EndDate") + .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" ) val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate", $"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] + .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") @@ -119,7 +122,7 @@ object SparkProcessMAG { magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") @@ -145,10 +148,11 @@ object SparkProcessMAG { spark.read.load(s"$workingPath/mag_publication").as[Publication] .filter(p => p.getId == null) .groupByKey(p => p.getId) - .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) + .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 11031f9ca..1cd3f7028 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -4,16 +4,17 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} +import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala similarity index 84% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 1b189e296..fa4a93e00 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -11,10 +11,10 @@ object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { + def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -35,8 +35,8 @@ object SparkConvertORCIDToOAF { val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark, workingPath, targetPath) + run(spark,workingPath, targetPath) } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala similarity index 67% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index 153be5dd1..31f331912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -1,45 +1,48 @@ package eu.dnetlib.doiboost.orcid +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions.{col, collect_list} -import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkPreprocessORCID { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item: ORCIDItem): ORCIDItem = { - ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + def fixORCIDItem(item :ORCIDItem):ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) } - def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { + def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD: RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s != null).filter(s => ORCIDToOAF.authorValid(s)) + val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s != null) + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i => { + .map(i =>{ val doi = i._1.doi val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) + (doi, author) + }).groupBy(col("_1").alias("doi")) .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] .map(s => fixORCIDItem(s)) .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") @@ -64,4 +67,4 @@ object SparkPreprocessORCID { } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala similarity index 80% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 70290018d..4530926f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -1,14 +1,16 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.application.ArgumentApplicationParser + import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { @@ -30,11 +32,11 @@ object SparkMapUnpayWallToOAF { val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") + val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d: Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)).as[Publication] + val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index bf5694965..c8324cde1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -4,13 +4,14 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala similarity index 98% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala index 049ac37f4..4912648be 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/DoiBoostHostedByMapTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.doiboost +package eu.dnetlib.dhp.doiboost import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset} import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType} diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala similarity index 96% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala index bdf845f19..a9a841ee9 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/NormalizeDoiTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.doiboost +package eu.dnetlib.dhp.doiboost import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 7403e103e..46d4ec08d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.mag import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, SparkSession} import org.codehaus.jackson.map.ObjectMapper -import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test +import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala similarity index 99% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index a5ce6296c..b484dc087 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -10,9 +10,10 @@ import org.junit.jupiter.api.io.TempDir import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path -import scala.collection.JavaConversions._ import scala.io.Source +import scala.collection.JavaConversions._ + class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val mapper = new ObjectMapper() diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala index 012ed3da0..fa696fffc 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala @@ -3,11 +3,11 @@ package eu.dnetlib.doiboost.uw import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.OpenAccessRoute -import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} import scala.io.Source +import org.junit.jupiter.api.Assertions._ +import org.slf4j.{Logger, LoggerFactory} class UnpayWallMappingTest { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index ad4e1c96e..ce383292c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala similarity index 81% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 38af3eee4..1b18ba3ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -2,12 +2,13 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.Datasource +import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} @@ -51,18 +52,18 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats: Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(preparedInfoPath) + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression", "gzip") + .option("compression","gzip") .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala similarity index 85% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index d360da2e9..0e047d016 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -5,13 +5,16 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Instance, OpenAccessRoute, Publication} +import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} + import scala.collection.JavaConverters._ + + object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { @@ -36,7 +39,6 @@ object SparkApplyHostedByMapToResult { p })(Encoders.bean(classOf[Publication])) } - def main(args: Array[String]): Unit = { @@ -65,18 +67,18 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs: Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo: Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) spark.read.textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression", "gzip") + .option("compression","gzip") .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala similarity index 74% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 87e203e4b..b7a7d352f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,58 +3,61 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo + import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} + + object SparkPrepareHostedByInfoToApply { implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - def getList(id: String, j: Journal, name: String): List[EntityInfo] = { - var lst: List[EntityInfo] = List() + def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { + var lst:List[EntityInfo] = List() - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } - if (j.getIssnOnline != null && !j.getIssnOnline.equals("")) { + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst } - if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")) { + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst } lst } - def prepareResultInfo(spark: SparkSession, publicationPath: String): Dataset[EntityInfo] = { + def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) val mapper = new ObjectMapper() - val dd: Dataset[Publication] = spark.read.textFile(publicationPath) + val dd : Dataset[Publication] = spark.read.textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) - dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) + dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) } - def toEntityInfo(input: String): EntityInfo = { + def toEntityInfo(input:String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] toEntityItem(c.keys.head, c.values.head) } - def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { + def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) @@ -64,7 +67,7 @@ object SparkPrepareHostedByInfoToApply { Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 - if (t2._2 != null) { + if(t2._2 != null ){ val ds = t2._2 res.setHostedById(ds.getId) res.setOpenAccess(ds.getOpenAccess) @@ -104,10 +107,10 @@ object SparkPrepareHostedByInfoToApply { //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo: Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset: Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 6dfe35623..1ee1d5d1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,39 +1,41 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - +import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path import java.io.PrintWriter +import org.apache.hadoop.io.compress.GzipCodec + + object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 val gold: HostedByInfo = input._2 val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => null + case whoa => null } } @@ -42,7 +44,7 @@ object SparkProduceHostedByMap { implicit val formats = org.json4s.DefaultFormats - val map: Map[String, HostedByItemType] = Map(input._1 -> input._2) + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) Serialization.write(map) @@ -50,33 +52,34 @@ object SparkProduceHostedByMap { } - def getHostedByItemType(id: String, officialname: String, issn: String, eissn: String, issnl: String, oa: Boolean): HostedByItemType = { - if (issn != null) { - if (eissn != null) { - if (issnl != null) { - HostedByItemType(id, officialname, issn, eissn, issnl, oa) - } else { - HostedByItemType(id, officialname, issn, eissn, "", oa) + + def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { + if(issn != null){ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, issn, eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, eissn, "" , oa) } - } else { - if (issnl != null) { - HostedByItemType(id, officialname, issn, "", issnl, oa) - } else { - HostedByItemType(id, officialname, issn, "", "", oa) + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, issn, "", issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, "", "" , oa) } } - } else { - if (eissn != null) { - if (issnl != null) { - HostedByItemType(id, officialname, "", eissn, issnl, oa) - } else { - HostedByItemType(id, officialname, "", eissn, "", oa) + }else{ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, "", eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, "", eissn, "" , oa) } - } else { - if (issnl != null) { - HostedByItemType(id, officialname, "", "", issnl, oa) - } else { - HostedByItemType("", "", "", "", "", oa) + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, "", "", issnl , oa) + }else{ + HostedByItemType("", "", "", "", "" , oa) } } } @@ -87,10 +90,10 @@ object SparkProduceHostedByMap { return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) } - HostedByItemType("", "", "", "", "", false) + HostedByItemType("","","","","",false) } - def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { import spark.implicits._ @@ -99,10 +102,10 @@ object SparkProduceHostedByMap { implicit var encoderD = Encoders.kryo[Datasource] - val dd: Dataset[Datasource] = spark.read.textFile(datasourcePath) + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[Datasource])) - dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) } @@ -112,17 +115,17 @@ object SparkProduceHostedByMap { } - def goldHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] val mapper = new ObjectMapper() - val dd: Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) - dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) } @@ -131,40 +134,41 @@ object SparkProduceHostedByMap { return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) } - def doajHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { import spark.implicits._ implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] val mapper = new ObjectMapper() - val dd: Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) .map(r => mapper.readValue(r, classOf[DOAJModel])) - dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) } def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { - var lst: List[(String, HostedByItemType)] = List() - if (!input.issn.equals("")) { + var lst : List[(String, HostedByItemType)] = List() + if(!input.issn.equals("")){ lst = (input.issn, input) :: lst } - if (!input.eissn.equals("")) { + if(!input.eissn.equals("")){ lst = (input.eissn, input) :: lst } - if (!input.lissn.equals("")) { + if(!input.lissn.equals("")){ lst = (input.lissn, input) :: lst } lst } - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { val conf = new Configuration() conf.set("fs.defaultFS", hdfsNameNode) - val fs = FileSystem.get(conf) + val fs= FileSystem.get(conf) val output = fs.create(new Path(outputPath)) val writer = new PrintWriter(output) try { @@ -178,6 +182,7 @@ object SparkProduceHostedByMap { } + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -208,7 +213,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath, classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala similarity index 88% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 0179cc266..c7ad1890d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -4,14 +4,20 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.commons.io.IOUtils +import org.apache.commons.lang3.StringUtils +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClients import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory -import scala.io.Source import scala.collection.JavaConverters._ +import scala.io.Source + object CopyHdfsOafSparkApplication { def main(args: Array[String]): Unit = { @@ -53,7 +59,7 @@ object CopyHdfsOafSparkApplication { if (validPaths.nonEmpty) { val oaf = spark.read.load(validPaths: _*).as[Oaf] val mapper = new ObjectMapper() - val l = ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList l.foreach( e => oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala similarity index 79% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 6b4a501d6..316b8afed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils @@ -13,7 +14,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -50,10 +51,10 @@ object SparkResolveEntities { fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) } - } +} - def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { +def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ @@ -70,22 +71,22 @@ object SparkResolveEntities { } - def deserializeObject(input: String, entity: EntityType): Result = { + def deserializeObject(input:String, entity:EntityType ) :Result = { - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software => mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) - } + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software=> mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) + } } - def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String) = { + def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re: Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] entities.foreach { e => diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index c7f9b2d0e..cd517dd5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.Relation +import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala new file mode 100644 index 000000000..9e905d806 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.oa.sx.graphimport + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession + +object SparkDataciteToOAF { + + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + import spark.implicits._ + + + val sc = spark.sparkContext + + val inputPath = parser.get("inputPath") + + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala similarity index 69% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 9d16cf907..3ee0c7dd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Result +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf @@ -29,13 +29,13 @@ object SparkConvertDatasetToJsonRDD { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val resultObject = List("publication", "dataset", "software", "otherResearchProduct") + val resultObject = List("publication","dataset","software", "otherResearchProduct") val mapper = new ObjectMapper() - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - resultObject.foreach { item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r => mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + resultObject.foreach{item => + spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index cc1b97fd6..846ac37af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -5,10 +5,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import org.apache.commons.io.IOUtils -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} +import org.apache.hadoop.io.compress._ object SparkConvertObjectToJson { @@ -32,8 +32,8 @@ object SparkConvertObjectToJson { log.info(s"objectType -> $objectType") - implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val scholixEncoder :Encoder[Scholix]= Encoders.kryo[Scholix] + implicit val summaryEncoder :Encoder[ScholixSummary]= Encoders.kryo[ScholixSummary] val mapper = new ObjectMapper @@ -42,11 +42,11 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala similarity index 62% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 2eb5e3a35..4b82fe645 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -2,12 +2,11 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} - object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { @@ -32,39 +31,39 @@ object SparkConvertRDDtoDataset { val entityPath = s"$t/entities" val relPath = s"$t/relation" val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) log.info("Converting dataset") - val rddDataset = spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") log.info("Converting publication") - val rddPublication = spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") log.info("Converting software") - val rddSoftware = spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") log.info("Converting otherresearchproduct") - val rddOtherResearchProduct = spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") log.info("Converting Relation") - val relationSemanticFilter = List("cites", "iscitedby", "merges", "ismergedin") + val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") - val rddRelation = spark.sparkContext.textFile(s"$sourcePath/relation") + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index b6f678967..350b00c5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} + + object SparkCreateInputGraph { def main(args: Array[String]): Unit = { @@ -31,7 +33,7 @@ object SparkCreateInputGraph { ) - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) @@ -39,13 +41,16 @@ object SparkCreateInputGraph { implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + + + val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] + val oafDs:Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] log.info("Extract Publication") @@ -65,27 +70,27 @@ object SparkCreateInputGraph { resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}", s"$targetPath/preprocess/${r._1}", spark, r._2) + makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) } } - def extractEntities[T <: Oaf](oafDs: Dataset[Oaf], targetPath: String, clazz: Class[T], log: Logger): Unit = { + def extractEntities[T <: Oaf ](oafDs:Dataset[Oaf], targetPath:String, clazz:Class[T], log:Logger) :Unit = { - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) log.info(s"Extract ${clazz.getSimpleName}") oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) } - def makeDatasetUnique[T <: Result](sourcePath: String, targetPath: String, spark: SparkSession, clazz: Class[T]): Unit = { + def makeDatasetUnique[T <: Result ](sourcePath:String, targetPath:String, spark:SparkSession, clazz:Class[T]) :Unit = { import spark.implicits._ - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - val ds: Dataset[T] = spark.read.load(sourcePath).as[T] + val ds:Dataset[T] = spark.read.load(sourcePath).as[T] - ds.groupByKey(_.getId).reduceGroups { (x, y) => + ds.groupByKey(_.getId).reduceGroups{(x,y) => x.mergeFrom(y) x }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index 9930c57af..e4fcd2782 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.count -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkCreateScholix { @@ -42,7 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] @@ -51,54 +51,54 @@ object SparkCreateScholix { relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => - if (input._1 != null && input._2 != null) { + if (input._1!= null && input._2!= null) { val rel: Relation = input._1._2 val source: ScholixSummary = input._2._2 (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) } - else null + else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .filter(r => r != null) + .filter(r => r!= null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Scholix), (String, ScholixSummary)) => - if (input._2 == null) { + if (input._2== null) { null } else { val s: Scholix = input._1._2 val target: ScholixSummary = input._2._2 ScholixUtils.generateCompleteScholix(s, target) } - }.filter(s => s != null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") + }.filter(s => s!= null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] - .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .map(s=> (s.getIdentifier,s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) .groupByKey(_._1) .agg(ScholixUtils.scholixAggregator.toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") - val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] + val scholix_final:Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - val stats: Dataset[(String, String, Long)] = scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String, String, Long)] + val stats:Dataset[(String,String,Long)]= scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String,String,Long)] stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0)) + .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 )) .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset + b.relatedDataset, a.relatedPublication + b.relatedPublication)) + .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset+b.relatedDataset, a.relatedPublication+b.relatedPublication)) .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") - val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) + val relatedEntitiesDS:Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication>0 || r.relatedDataset > 0) - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map { i => + relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map{i => val re = i._1 val sum = i._2._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 4274cae5a..0970375f5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkCreateSummaryObject { @@ -28,15 +28,15 @@ object SparkCreateSummaryObject { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds: Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) + val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s != null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala index c70397d04..193512474 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -5,7 +5,6 @@ import org.apache.spark.sql.{Encoder, Encoders} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse - import java.util.regex.Pattern import scala.language.postfixOps import scala.xml.{Elem, Node, XML} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index f1a4553ea..79c75d6df 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -2,12 +2,13 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import scala.io.Source import scala.collection.JavaConverters._ +import scala.io.Source + object SparkGeneratePanagaeaDataset { @@ -27,17 +28,17 @@ object SparkGeneratePanagaeaDataset { parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") - val sc: SparkContext = spark.sparkContext + val sc:SparkContext = spark.sparkContext - val workingPath: String = parser.get("workingPath") + val workingPath:String = parser.get("workingPath") implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD: RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD:RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) spark.createDataset(inputRDD).as[PangaeaDataModel] - .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) - .groupByKey(_._1)(Encoders.STRING) + .map(s => (s.identifier,s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) + .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") @@ -45,4 +46,7 @@ object SparkGeneratePanagaeaDataset { } + + + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala similarity index 61% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 7b1ddbb8f..93c554e04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,5 +1,6 @@ package eu.dnetlib.dhp.sx.graph.scholix + import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.sx.scholix._ import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} @@ -10,23 +11,22 @@ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import scala.io.Source import scala.collection.JavaConverters._ - +import scala.io.Source +import scala.language.postfixOps object ScholixUtils { val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" - val DATE_RELATION_KEY: String = "RelationDate" + val DATE_RELATION_KEY:String = "RelationDate" + case class RelationVocabulary(original:String, inverse:String){} - case class RelationVocabulary(original: String, inverse: String) {} + case class RelatedEntities(id:String, relatedDataset:Long, relatedPublication:Long){} - case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} - - val relations: Map[String, RelationVocabulary] = { - val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString + val relations:Map[String, RelationVocabulary] = { + val input =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) @@ -35,12 +35,12 @@ object ScholixUtils { } - def extractRelationDate(relation: Relation): String = { + def extractRelationDate(relation: Relation):String = { - if (relation.getProperties == null || !relation.getProperties.isEmpty) + if (relation.getProperties== null || !relation.getProperties.isEmpty) null else { - val date = relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) + val date =relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) if (date.isDefined) date.get else @@ -48,9 +48,9 @@ object ScholixUtils { } } - def extractRelationDate(summary: ScholixSummary): String = { + def extractRelationDate(summary: ScholixSummary):String = { - if (summary.getDate == null || summary.getDate.isEmpty) + if(summary.getDate== null || summary.getDate.isEmpty) null else { summary.getDate.get(0) @@ -59,14 +59,15 @@ object ScholixUtils { } - def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { + def inverseRelationShip(rel:ScholixRelationship):ScholixRelationship = { new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) } - val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { + + val statsAggregator:Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] with Serializable { override def zero: RelatedEntities = null override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { @@ -77,16 +78,17 @@ object ScholixUtils { if (b == null) RelatedEntities(a._1, relatedDataset, relatedPublication) else - RelatedEntities(a._1, b.relatedDataset + relatedDataset, b.relatedPublication + relatedPublication) + RelatedEntities(a._1,b.relatedDataset+ relatedDataset, b.relatedPublication+ relatedPublication ) } override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1 != null && b2 != null) - RelatedEntities(b1.id, b1.relatedDataset + b2.relatedDataset, b1.relatedPublication + b2.relatedPublication) + if (b1!= null && b2!= null) + RelatedEntities(b1.id, b1.relatedDataset+ b2.relatedDataset, b1.relatedPublication+ b2.relatedPublication) - else if (b1 != null) - b1 else + if (b1!= null) + b1 + else b2 } @@ -102,12 +104,12 @@ object ScholixUtils { override def zero: Scholix = null - def scholix_complete(s: Scholix): Boolean = { - if (s == null || s.getIdentifier == null) { + def scholix_complete(s:Scholix):Boolean ={ + if (s== null || s.getIdentifier==null) { false } else if (s.getSource == null || s.getTarget == null) { - false - } + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) false else @@ -119,7 +121,7 @@ object ScholixUtils { } override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 + if (scholix_complete(b1)) b1 else b2 } override def finish(reduction: Scholix): Scholix = reduction @@ -130,7 +132,7 @@ object ScholixUtils { } - def createInverseScholixRelation(scholix: Scholix): Scholix = { + def createInverseScholixRelation(scholix: Scholix):Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -142,33 +144,34 @@ object ScholixUtils { s + } - def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { - if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { + def extractCollectedFrom(summary:ScholixSummary): List[ScholixEntityId] = { + if (summary.getDatasources!= null && !summary.getDatasources.isEmpty) { + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map{ d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) }(collection.breakOut) - l + l } else List() } - def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { + def extractCollectedFrom(relation: Relation) : List[ScholixEntityId] = { if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava) + new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA,null)).asJava) }(collection breakOut) l } else List() } - def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { + def generateCompleteScholix(scholix: Scholix, target:ScholixSummary): Scholix = { val s = new Scholix s.setPublicationDate(scholix.getPublicationDate) s.setPublisher(scholix.getPublisher) @@ -189,28 +192,29 @@ object ScholixUtils { r.setObjectType(summaryObject.getTypology.toString) r.setObjectSubType(summaryObject.getSubType) - if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty) - r.setTitle(summaryObject.getTitle.get(0)) + if (summaryObject.getTitle!= null && !summaryObject.getTitle.isEmpty) + r.setTitle(summaryObject.getTitle.get(0)) - if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { - val l: List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList + if (summaryObject.getAuthor!= null && !summaryObject.getAuthor.isEmpty){ + val l:List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a,null)).toList if (l.nonEmpty) r.setCreator(l.asJava) } - if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) + if (summaryObject.getDate!= null && !summaryObject.getDate.isEmpty) r.setPublicationDate(summaryObject.getDate.get(0)) - if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { - val plist: List[ScholixEntityId] = summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + if (summaryObject.getPublisher!= null && !summaryObject.getPublisher.isEmpty) + { + val plist:List[ScholixEntityId] =summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList if (plist.nonEmpty) r.setPublisher(plist.asJava) } - if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { + if (summaryObject.getDatasources!= null && !summaryObject.getDatasources.isEmpty) { - val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( + val l:List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) , "collected", "complete" @@ -224,9 +228,12 @@ object ScholixUtils { } - def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { - if (relation == null || source == null) + + + def scholixFromSource(relation:Relation, source:ScholixSummary):Scholix = { + + if (relation== null || source== null) return null val s = new Scholix @@ -246,9 +253,9 @@ object ScholixUtils { s.setPublicationDate(d) - if (source.getPublisher != null && !source.getPublisher.isEmpty) { + if (source.getPublisher!= null && !source.getPublisher.isEmpty) { val l: List[ScholixEntityId] = source.getPublisher.asScala - .map { + .map{ p => new ScholixEntityId(p, null) }(collection.breakOut) @@ -258,7 +265,7 @@ object ScholixUtils { } val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) - if (semanticRelation == null) + if (semanticRelation== null) return null s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) s.setSource(generateScholixResourceFromSummary(source)) @@ -267,8 +274,8 @@ object ScholixUtils { } - def findURLForPID(pidValue: List[StructuredProperty], urls: List[String]): List[(StructuredProperty, String)] = { - pidValue.map { + def findURLForPID(pidValue:List[StructuredProperty], urls:List[String]):List[(StructuredProperty, String)] = { + pidValue.map{ p => val pv = p.getValue @@ -278,67 +285,67 @@ object ScholixUtils { } - def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { + def extractTypedIdentifierFromInstance(r:Result):List[ScholixIdentifier] = { if (r.getInstance() == null || r.getInstance().isEmpty) return List() - r.getInstance().asScala.filter(i => i.getUrl != null && !i.getUrl.isEmpty) - .filter(i => i.getPid != null && i.getUrl != null) + r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) + .filter(i => i.getPid!= null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList } - def resultToSummary(r: Result): ScholixSummary = { + def resultToSummary(r:Result):ScholixSummary = { val s = new ScholixSummary s.setId(r.getId) if (r.getPid == null || r.getPid.isEmpty) return null - val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) if (persistentIdentifiers.isEmpty) return null s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication]) + if (r.isInstanceOf[Publication] ) s.setTypology(Typology.publication) else s.setTypology(Typology.dataset) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) - if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { - val titles: List[String] = r.getTitle.asScala.map(t => t.getValue)(collection breakOut) + if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { + val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) if (titles.nonEmpty) s.setTitle(titles.asJava) else - return null + return null } - if (r.getAuthor != null && !r.getAuthor.isEmpty) { - val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname)(collection breakOut) + if(r.getAuthor!= null && !r.getAuthor.isEmpty) { + val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) if (authors nonEmpty) s.setAuthor(authors.asJava) } if (r.getInstance() != null) { - val dt: List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) + val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) if (dt.nonEmpty) s.setDate(dt.distinct.asJava) } - if (r.getDescription != null && !r.getDescription.isEmpty) { - val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) + if (r.getDescription!= null && !r.getDescription.isEmpty) { + val d = r.getDescription.asScala.find(f => f!= null && f.getValue!=null) if (d.isDefined) s.setDescription(d.get.getValue) } - if (r.getSubject != null && !r.getSubject.isEmpty) { - val subjects: List[SchemeValue] = r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) + if (r.getSubject!= null && !r.getSubject.isEmpty) { + val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) if (subjects.nonEmpty) s.setSubject(subjects.asJava) } - if (r.getPublisher != null) + if (r.getPublisher!= null) s.setPublisher(List(r.getPublisher.getValue).asJava) - if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { - val cf: List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) + if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { + val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) if (cf.nonEmpty) s.setDatasources(cf.distinct.asJava) } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala similarity index 96% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index 7abce547f..a3a753a8a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -3,9 +3,13 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s import org.json4s.DefaultFormats +import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala similarity index 98% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 0922f2e19..5b00e9b6f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -4,9 +4,10 @@ import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats -import org.json4s.jackson.Serialization.write -import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write class TestPreprocess extends java.io.Serializable{ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index f1bd841d1..9a142d3c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -159,7 +159,6 @@ class ResolveEntitiesTest extends Serializable { val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) - val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala similarity index 95% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala index 0d89cca85..b90827e81 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/pangaea/PangaeaTransformTest.scala @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.sx.pangaea import eu.dnetlib.dhp.sx.graph.pangaea.PangaeaUtils import org.junit.jupiter.api.Test +import java.util.TimeZone import java.text.SimpleDateFormat import java.util.Date import scala.io.Source diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset index 2c73183e2..05c875148 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -1,3 +1,3 @@ -{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} -{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} -{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file From 0c0d561bc47c588d0049fcab1dc8e400efe1a7b7 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 09:54:22 +0100 Subject: [PATCH 14/44] added public class into tests to create correct javadoc --- .../eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java | 2 +- .../oa/matchers/simple/EnrichMissingPublicationDateTest.java | 2 +- .../eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java | 2 +- .../test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 45bfc785f..52e9917bb 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -class UpdateMatcherTest { +public class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 550ded9f4..5af81a31a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -class EnrichMissingPublicationDateTest { +public class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index b532aa9f7..d93390e4a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -class SubscriptionUtilsTest { +public class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index a8bc03e31..117bdeef4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -class TrustUtilsTest { +public class TrustUtilsTest { private static final double THRESHOLD = 0.95; From fc03c99805c540d03b82ea269d4ff6b4f3eec8cd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 10:46:33 +0100 Subject: [PATCH 15/44] fixed javadocs url after deploying site --- dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md index 00e3ed877..c1813394b 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md @@ -9,8 +9,8 @@ the following [schema](https://www.nlm.nih.gov/bsd/licensee/elements_description Parsing ------- -The resposible class of parsing is [PMParser](./scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates -an intermediate mapping of PubMed Article defined [here](/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) +The resposible class of parsing is [PMParser](/dnet-hadoop/scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates +an intermediate mapping of PubMed Article defined [here](/dnet-hadoop/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) Mapping From 2b46b87f56a209c5ca91803920121b96a5535306 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Nov 2021 11:30:29 +0100 Subject: [PATCH 16/44] fixed filtering criteria applied in SparkCopyRelationsNoOpenorgs to keep the parent/child relations from OpenOrgs --- .../dhp/oa/dedup/AbstractSparkAction.java | 24 +++++++++++++++---- .../dedup/SparkCopyRelationsNoOpenorgs.java | 2 +- .../oa/dedup/SparkOpenorgsProvisionTest.java | 24 ++++++++++++++++--- .../openorgs/provision/relation/part-00000 | 2 ++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 6a9b21b00..136413376 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -139,14 +139,28 @@ abstract class AbstractSparkAction implements Serializable { protected boolean isOpenorgs(Relation rel) { return Optional .ofNullable(rel.getCollectedfrom()) - .map( - c -> c - .stream() - .filter(Objects::nonNull) - .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) + .map(c -> isCollectedFromOpenOrgs(c)) .orElse(false); } + protected boolean isOpenorgsDedupRel(Relation rel) { + return isOpenorgs(rel) && isOpenOrgsDedupMergeRelation(rel); + } + + private boolean isCollectedFromOpenOrgs(List c) { + return c + .stream() + .filter(Objects::nonNull) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())); + } + + private boolean isOpenOrgsDedupMergeRelation(Relation rel) { + return ModelConstants.ORG_ORG_RELTYPE.equals(rel.getRelType()) && + ModelConstants.DEDUP.equals(rel.getSubRelType()) + && (ModelConstants.IS_MERGED_IN.equals(rel.getRelClass()) || + ModelConstants.MERGES.equals(rel.getRelClass())); + } + protected static Boolean parseECField(Field field) { if (field == null) return null; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9cc003bf6..62cbb5bff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -61,7 +61,7 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(x -> !isOpenorgs(x)); + .filter(x -> !isOpenorgsDedupRel(x)); if (log.isDebugEnabled()) { log.debug("Number of non-Openorgs relations collected: {}", simRels.count()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 2349ffebe..3cd695524 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -11,6 +11,8 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -29,6 +31,8 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -226,9 +230,23 @@ public class SparkOpenorgsProvisionTest implements Serializable { new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService); - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + final JavaRDD rels = jsc.textFile(testDedupGraphBasePath + "/relation"); + + long relations = rels.count(); + + final ObjectMapper mapper = new ObjectMapper(); + List relTypes = rels + .map(r -> mapper.readValue(r, Relation.class)) + .map( + r -> r.getRelType() + "_" + r.getSubRelType() + "_" + r.getRelClass() + "|" + + r.getCollectedfrom().stream().map(cf -> cf.getValue()).collect(Collectors.joining(","))) + .distinct() + .collect(); + + relTypes.forEach(r -> System.out.println("relType: " + r)); + + assertEquals(2382, relations); - assertEquals(2380, relations); } @Test @@ -250,7 +268,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4894, relations); + assertEquals(4896, relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 index 67d491ca2..35d92089d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 @@ -2518,3 +2518,5 @@ {"subRelType": "dedup", "relClass": "isMergedIn", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::5c351d85f02db01ca291acd119f0bd78", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|opendoar____::37248e2f6987b18670dd2b8a51d6ef55", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} {"subRelType": "dedup", "relClass": "merges", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} {"subRelType": "dedup", "relClass": "isMergedIn", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} +{"subRelType": "relationship", "relClass": "IsParentOf", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} +{"subRelType": "relationship", "relClass": "IsChildOf", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} From f4538f3c4c29df211b9c8eb5f2560620677ff642 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Nov 2021 11:33:10 +0100 Subject: [PATCH 17/44] cleanup --- .../dhp/oa/dedup/SparkOpenorgsProvisionTest.java | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 3cd695524..2a9f34dee 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -232,20 +232,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { final JavaRDD rels = jsc.textFile(testDedupGraphBasePath + "/relation"); - long relations = rels.count(); - - final ObjectMapper mapper = new ObjectMapper(); - List relTypes = rels - .map(r -> mapper.readValue(r, Relation.class)) - .map( - r -> r.getRelType() + "_" + r.getSubRelType() + "_" + r.getRelClass() + "|" + - r.getCollectedfrom().stream().map(cf -> cf.getValue()).collect(Collectors.joining(","))) - .distinct() - .collect(); - - relTypes.forEach(r -> System.out.println("relType: " + r)); - - assertEquals(2382, relations); + assertEquals(2382, rels.count()); } From 4542a2338ba6dabdbc5f06e26e992ed8b7c26f6b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 13:44:08 +0100 Subject: [PATCH 18/44] updated site configuration to deploy on website --- dhp-build/dhp-code-style/pom.xml | 11 +++++++++-- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 7a6a32e0e..25151fb87 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -24,11 +24,18 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-build/dhp-code-style + ${dhp.site.stage.path}/dhp-build/dhp-code-style + + + org.apache.maven.wagon + wagon-ssh + 2.10 + + @@ -47,7 +54,7 @@ UTF-8 - /tmp/dhp-site + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop \ No newline at end of file diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index fed689a06..1b39f9f0a 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -24,7 +24,7 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-build + ${dhp.site.stage.path}/dhp-build diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 686b89f6b..4a13b3459 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -16,7 +16,7 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-common + ${dhp.site.stage.path}/dhp-common diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 89ba2bf70..53d029467 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -18,7 +18,7 @@ DHPSite - file://${dhp.site.stage.path}/site/dhp-workflows + ${dhp.site.stage.path}/dhp-workflows diff --git a/pom.xml b/pom.xml index f7e3c6226..9e9bbaa16 100644 --- a/pom.xml +++ b/pom.xml @@ -721,7 +721,7 @@ DHPSite - file://${dhp.site.stage.path}/site/ + ${dhp.site.stage.path}/ @@ -738,7 +738,7 @@ - /tmp/dhp-site + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop UTF-8 UTF-8 3.6.0 From 75298ec44272b2589821478d1ba0be85200731b3 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 14:48:44 +0100 Subject: [PATCH 19/44] added site.xml to code style --- dhp-build/dhp-code-style/src/site/site.xml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 dhp-build/dhp-code-style/src/site/site.xml diff --git a/dhp-build/dhp-code-style/src/site/site.xml b/dhp-build/dhp-code-style/src/site/site.xml new file mode 100644 index 000000000..634a2c154 --- /dev/null +++ b/dhp-build/dhp-code-style/src/site/site.xml @@ -0,0 +1,21 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + \ No newline at end of file From 155d8bf83fdfb6fe069aa062076b95054272129a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 14:51:08 +0100 Subject: [PATCH 20/44] updated maven site plugin on dhp-code-style --- dhp-build/dhp-code-style/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 25151fb87..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -46,7 +46,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.9.1 From 65ebe1019b859b93a13d07f6029d19057ac22782 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 14:59:04 +0100 Subject: [PATCH 21/44] updated wagon-ssh version --- dhp-build/dhp-code-style/pom.xml | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index db0097d64..707033fd9 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -33,7 +33,7 @@ org.apache.maven.wagon wagon-ssh - 2.10 + 2.12 diff --git a/pom.xml b/pom.xml index 9e9bbaa16..12c0c9c1b 100644 --- a/pom.xml +++ b/pom.xml @@ -704,7 +704,7 @@ org.apache.maven.wagon wagon-ssh - 2.10 + 2.12 From 6110a2b984251c2a12fcb0c9e484900584f057cb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 19 Nov 2021 15:31:45 +0100 Subject: [PATCH 22/44] reverted version --- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 707033fd9..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -33,7 +33,7 @@ org.apache.maven.wagon wagon-ssh - 2.12 + 2.10 diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 1b39f9f0a..97fbdf45b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -24,7 +24,7 @@ DHPSite - ${dhp.site.stage.path}/dhp-build + ${dhp.site.stage.path}/dhp-build/ diff --git a/pom.xml b/pom.xml index 12c0c9c1b..9e9bbaa16 100644 --- a/pom.xml +++ b/pom.xml @@ -704,7 +704,7 @@ org.apache.maven.wagon wagon-ssh - 2.12 + 2.10 From fdb75b180e8fdb297bbbc1e4fc915133a01b1a95 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sun, 21 Nov 2021 16:35:22 +0100 Subject: [PATCH 23/44] [Cleaning] added couple of tests for DOIBOOST publications --- .../clean/GraphCleaningFunctionsTest.java | 27 +++++++++++++++++++ .../dhp/oa/graph/clean/doiboostpub.json | 1 + .../dhp/oa/graph/clean/doiboostpub2.json | 1 + 3 files changed, 29 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index aa9535ef7..64fe7749b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -12,6 +12,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlock; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -222,4 +224,29 @@ public class GraphCleaningFunctionsTest { .readLines( GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); } + + + + @Test + public void testCleanDoiBoost() throws IOException { + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + + + Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + } + + @Test + public void testCleanDoiBoost2() throws IOException { + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + + + Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json new file mode 100644 index 000000000..061145d0f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-11-06T11:36:37Z"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "id": "50|doi_________::b0baa0eb88a5788f0b8815560d2a32f2", "subject": [], "lastupdatetimestamp": 1620353302565, "author": [{"fullname": "N. S. AGRUSS", "surname": "AGRUSS", "name": "N. S.", "rank": 1}, {"fullname": "E. Y. ROSIN", "surname": "ROSIN", "name": "E. Y.", "rank": 2}, {"fullname": "R. J. ADOLPH", "surname": "ADOLPH", "name": "R. J.", "rank": 3}, {"fullname": "N. O. FOWLER", "surname": "FOWLER", "name": "N. O.", "rank": 4}], "instance": [{"hostedby": {"key": "10|issn___print::b8cee613d4f898f8c03956d57ea69be2", "value": "Survey of Anesthesiology"}, "url": ["https://doi.org/10.1097/00132586-197308000-00003"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T02:08:22Z", "fulltext": [], "description": [], "format": [], "journal": {"issnPrinted": "0039-6206", "vol": "17", "sp": "304", "name": "Survey of Anesthesiology"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Ovid Technologies (Wolters Kluwer Health)"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.1097/00132586-197308000-00003", "50|doiboost____::b0baa0eb88a5788f0b8815560d2a32f2"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "SIGNIFICANCE OF CHRONIC SINUS BRADYCARDIA IN ELDERLY PEOPLE"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json new file mode 100644 index 000000000..cf81b0286 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2007-08-20T08:35:04Z"}, {"qualifier": {"classid": "published-online", "classname": "published-online", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-01-01"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "id": "50|doi_________::4972b0ca81b96b225aed8038bb965656", "subject": [{"qualifier": {"classid": "keywords", "classname": "keywords", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "General Medicine"}], "lastupdatetimestamp": 1620381522840, "author": [{"fullname": "null VERHAMME P", "surname": "VERHAMME P", "rank": 1}], "instance": [{"hostedby": {"key": "10|issn__online::7ec728ad1ac65c60cd563a5137111125", "value": "Tijdschrift voor Geneeskunde"}, "url": ["https://doi.org/10.2143/tvg.62.1.5002364"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "dateofacceptance": {"value": "2006-01-01"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T09:58:42Z", "fulltext": [], "description": [], "format": [], "journal": {"vol": "62", "sp": "55", "issnOnline": "0371-683X", "ep": "61", "name": "Tijdschrift voor Geneeskunde"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Peeters Publishers"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.2143/tvg.62.1.5002364", "50|doiboost____::4972b0ca81b96b225aed8038bb965656"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-01-01"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Antitrombotica: nieuwe moleculen"}]} \ No newline at end of file From 35e20b0647ac97ffefddf0ca5cdc1e15488cf6d1 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 11:48:55 +0100 Subject: [PATCH 24/44] updated resolution wf: - generate a new version of the graph - changed merge from union to join --- .../resolution/SparkResolveEntities.scala | 72 +++++++++---------- .../resolution/SparkResolveRelation.scala | 12 ++-- .../graph/resolution/oozie_app/workflow.xml | 6 ++ .../resolution/resolve_entities_params.json | 3 +- .../resolution/resolve_relations_params.json | 3 +- .../resolution/ResolveEntitiesTest.scala | 48 ++++++++++++- 6 files changed, 97 insertions(+), 47 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 316b8afed..f8ebb6800 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -14,7 +14,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkResolveEntities { val mapper = new ObjectMapper() - val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + val entities = List(EntityType.dataset, EntityType.publication, EntityType.software, EntityType.otherresearchproduct) def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -36,25 +36,19 @@ object SparkResolveEntities { val unresolvedPath = parser.get("unresolvedPath") log.info(s"unresolvedPath -> $unresolvedPath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) fs.mkdirs(new Path(workingPath)) resolveEntities(spark, workingPath, unresolvedPath) - generateResolvedEntities(spark, workingPath, graphBasePath) - - // TO BE conservative we keep the original entities in the working dir - // and save the resolved entities on the graphBasePath - //In future these lines of code should be removed - entities.foreach { - e => - fs.rename(new Path(s"$graphBasePath/$e"), new Path(s"$workingPath/${e}_old")) - fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) - } - -} + generateResolvedEntities(spark, workingPath, graphBasePath, targetPath) + } -def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ @@ -71,37 +65,43 @@ def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: St } - def deserializeObject(input:String, entity:EntityType ) :Result = { + def deserializeObject(input: String, entity: EntityType): Result = { - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software=> mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) - } + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software => mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) + } } - def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { + def generateResolvedEntities(spark: SparkSession, workingPath: String, graphBasePath: String, targetPath:String) = { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r)) entities.foreach { - e => + e => { + + val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r)) + + + currentEntityDataset.joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left").map(k => { + + val a = k._1 + val b = k._2 + if (b == null) + a._2 + else { + a._2.mergeFrom(b._2) + a._2 + } + }).map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$targetPath/$e") + } + - spark.read.text(s"$graphBasePath/$e").as[String] - .map(s => deserializeObject(s, e)) - .union(re) - .groupByKey(_.getId) - .reduceGroups { - (x, y) => - x.mergeFrom(y) - x - }.map(_._2) - .filter(r => r.getClass.getSimpleName.toLowerCase != "result") - .map(r => mapper.writeValueAsString(r))(Encoders.STRING) - .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingPath/resolvedGraph/$e") } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index cd517dd5e..a194f2694 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -35,6 +35,9 @@ object SparkResolveRelation { val workingPath = parser.get("workingPath") log.info(s"workingPath -> $workingPath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) import spark.implicits._ @@ -80,20 +83,13 @@ object SparkResolveRelation { .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") - - // TO BE conservative we keep the original relation in the working dir - // and save the relation resolved on the graphBasePath - //In future this two line of code should be removed - - fs.rename(new Path(s"$graphBasePath/relation"), new Path(s"$workingPath/relation")) - spark.read.load(s"$workingPath/relation_resolved").as[Relation] .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) .map(r => mapper.writeValueAsString(r)) .write .option("compression", "gzip") .mode(SaveMode.Overwrite) - .text(s"$graphBasePath/relation") + .text(s"$targetPath/relation") } def extractInstanceCF(input: String): List[(String, String)] = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index ceb13c5e8..8859295bf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -8,6 +8,10 @@ unresolvedPath the path of the unresolved Entities + + targetPath + the target path after resolution + @@ -36,6 +40,7 @@ --masteryarn --graphBasePath${graphBasePath} --workingPath${workingDir} + --targetPath${targetPath} @@ -62,6 +67,7 @@ --graphBasePath${graphBasePath} --unresolvedPath${unresolvedPath} --workingPath${workingDir} + --targetPath${targetPath} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json index f38cc1291..67e315664 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json @@ -2,5 +2,6 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, {"paramName":"u", "paramLongName":"unresolvedPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json index 1fbe20648..66a035da5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json @@ -1,5 +1,6 @@ [ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 46bf48974..0d7350fdd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Result, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty} import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -154,19 +154,39 @@ class ResolveEntitiesTest extends Serializable { val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + var ct = pubDS.count() + var et = pubDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + + assertEquals(ct, et) + + val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + ct = datDS.count() + et = datDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + assertEquals(ct, et) val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + ct = softDS.count() + et = softDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + assertEquals(ct, et) val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + ct = orpDS.count() + et = orpDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() + assertEquals(ct, et) + + + + + assertEquals(0, t) assertEquals(2, td) assertEquals(1, ts) @@ -178,6 +198,32 @@ class ResolveEntitiesTest extends Serializable { + @Test + def testMerge():Unit = { + + val r = new Result + r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + + val mapper = new ObjectMapper() + + val p = mapper.readValue(Source.fromInputStream(this.getClass.getResourceAsStream(s"publication")).mkString.lines.next(), classOf[Publication]) + + + r.mergeFrom(p) + + + println(mapper.writeValueAsString(r)) + + + + + + + + + } + + From 93fe8ce8b2011aa6108793ba7389eef278aa62ac Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 15:50:43 +0100 Subject: [PATCH 25/44] entity resolution: fix test --- .../dhp/oa/graph/resolution/SparkResolveEntities.scala | 5 ++--- .../dhp/oa/graph/resolution/ResolveEntitiesTest.scala | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index f8ebb6800..be217c5c3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -80,12 +80,11 @@ object SparkResolveEntities { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) import spark.implicits._ - val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r)) + val re: Dataset[(String, Result)] = spark.read.load(s"$workingPath/resolvedEntities").as[Result].map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) entities.foreach { e => { - val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r)) - + val currentEntityDataset: Dataset[(String, Result)] = spark.read.text(s"$graphBasePath/$e").as[String].map(s => deserializeObject(s, e)).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) currentEntityDataset.joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left").map(k => { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 0d7350fdd..c22243f94 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -146,11 +146,11 @@ class ResolveEntitiesTest extends Serializable { implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) val m = new ObjectMapper() SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) - SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph" ) + SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph", s"$workingDir/target" ) - val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) + val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/target/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() @@ -161,21 +161,21 @@ class ResolveEntitiesTest extends Serializable { - val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val datDS:Dataset[Result] = spark.read.text(s"$workingDir/target/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() ct = datDS.count() et = datDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() assertEquals(ct, et) - val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) + val softDS:Dataset[Result] = spark.read.text(s"$workingDir/target/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() ct = softDS.count() et = softDS.filter(p => p.getTitle!= null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)).count() assertEquals(ct, et) - val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) + val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/target/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() From 483d3039d1a4e42656fff8a205f3b75d7271aed7 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 15:55:24 +0100 Subject: [PATCH 26/44] entity resolution: added distcpt of missing entities in graph materialization --- .../graph/resolution/oozie_app/workflow.xml | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 8859295bf..031b5a36f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -69,10 +69,44 @@ --workingPath${workingDir} --targetPath${targetPath} - + - + + + + + + + + ${nameNode}/${graphBasePath}/organization + ${nameNode}/${targetPath}/organization + + + + + + + + ${nameNode}/${graphBasePath}/project + ${nameNode}/${targetPath}/project + + + + + + + + ${nameNode}/${graphBasePath}/datasource + ${nameNode}/${targetPath}/datasource + + + + + + + + \ No newline at end of file From a7cf277d98aee3fc6bad29661aca9b96beccdc6d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 22 Nov 2021 16:03:17 +0100 Subject: [PATCH 27/44] Datacite: Removed HostedBy Patch as described on ticket #7219, Now all the records will have hosted by Unknown Repository --- .../dhp/datacite/DataciteToOAFTransformation.scala | 14 ++++---------- .../dnetlib/dhp/datacite/DataciteToOAFTest.scala | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 6ce4920ed..1af72e8d3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -41,7 +41,7 @@ case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle case class DateType(date: Option[String], dateType: Option[String]) {} -case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} +//case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} object DataciteToOAFTransformation { @@ -90,17 +90,11 @@ object DataciteToOAFTransformation { } val mapper = new ObjectMapper() - val unknown_repository: HostedByMapType = HostedByMapType(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, ModelConstants.UNKNOWN_REPOSITORY.getValue, ModelConstants.UNKNOWN_REPOSITORY.getValue, Some(1.0F)) val dataInfo: DataInfo = generateDataInfo("0.9") val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite") - val hostedByMap: Map[String, HostedByMapType] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: org.json4s.JValue = parse(s) - json.extract[Map[String, HostedByMapType]] - } + val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) @@ -516,8 +510,8 @@ object DataciteToOAFTransformation { val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) if (client.isDefined) { - val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository) - instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name)) + + instance.setHostedby(OafMapperUtils.keyValue(generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID), ModelConstants.UNKNOWN_REPOSITORY.getValue)) instance.setCollectedfrom(DATACITE_COLLECTED_FROM) instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) instance.setAccessright(access_rights_qualifier) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index f21e9eab1..c7c6c6a92 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -36,7 +36,7 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ @Test def testMapping() :Unit = { - val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString + val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString From 2164a2a8898da176799bab3815244b234746eb45 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 10:54:13 +0100 Subject: [PATCH 28/44] Datacite: Code Refactor generated a general SparkApplication Scala where all the spark scala have to inherit Commented a little the Datacite transformation code --- .../AbstractScalaApplication.scala | 37 +++++ .../application/SparkScalaApplication.scala | 35 +++++ .../dhp/datacite/DataciteModelConstants.scala | 134 ++++++++++++++++++ .../DataciteToOAFTransformation.scala | 127 +++-------------- .../GenerateDataciteDatasetSpark.scala | 86 +++++++---- .../src/main/resources/log4j.properties | 3 + .../dhp/datacite/DataciteToOAFTest.scala | 57 +++++++- 7 files changed, 337 insertions(+), 142 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala new file mode 100644 index 000000000..44dad93eb --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala @@ -0,0 +1,37 @@ +package eu.dnetlib.dhp.application + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.slf4j.Logger + +abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { + + var parser: ArgumentApplicationParser = null + + var spark:SparkSession = null + + + def initialize():SparkScalaApplication = { + parser = parseArguments(args) + spark = createSparkSession() + this + } + + /** + * Utility for creating a spark session starting from parser + * + * @return a spark Session + */ + private def createSparkSession():SparkSession = { + require(parser!= null) + + val conf:SparkConf = new SparkConf() + val master = parser.get("master") + log.info(s"Creating Spark session: Master: $master") + SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + } + +} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala new file mode 100644 index 000000000..247bacac0 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.application + +import scala.io.Source + +/** + * This is the main Interface SparkApplication + * where all the Spark Scala class should inherit + * + */ +trait SparkScalaApplication { + /** + * This is the path in the classpath of the json + * describes all the argument needed to run + */ + val propertyPath: String + + /** + * Utility to parse the arguments using the + * property json in the classpath identified from + * the variable propertyPath + * + * @param args the list of arguments + */ + def parseArguments(args: Array[String]): ArgumentApplicationParser = { + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString) + parser.parseArgument(args) + parser + } + + /** + * Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + def run(): Unit +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala new file mode 100644 index 000000000..0685a04a3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala @@ -0,0 +1,134 @@ +package eu.dnetlib.dhp.datacite + +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue} +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils + +import java.io.InputStream +import java.time.format.DateTimeFormatter +import java.util.Locale +import java.util.regex.Pattern +import scala.io.Source + +/** + * This class represent the dataModel of the input Dataset of Datacite + * @param doi THE DOI + * @param timestamp timestamp of last update date + * @param isActive the record is active or deleted + * @param json the json native records + */ +case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} + +/* + The following class are utility class used for the mapping from + json datacite to OAF Shema + */ +case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {} + +case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {} + +case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {} + +case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} + +case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {} + +case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} + +case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {} + +case class DateType(date: Option[String], dateType: Option[String]) {} + +case class OAFRelations(relation:String, inverse:String, relType:String) + + +class DataciteModelConstants extends Serializable { + +} + +object DataciteModelConstants { + + val REL_TYPE_VALUE:String = "resultResult" + val DATE_RELATION_KEY = "RelationDate" + val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter" + val DOI_CLASS = "doi" + val SUBJ_CLASS = "keywords" + val DATACITE_NAME = "Datacite" + val dataInfo: DataInfo = dataciteDataInfo("0.9") + val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) + + val subRelTypeMapping: Map[String,OAFRelations] = Map( + ModelConstants.REFERENCES -> OAFRelations(ModelConstants.REFERENCES, ModelConstants.IS_REFERENCED_BY, ModelConstants.RELATIONSHIP), + ModelConstants.IS_REFERENCED_BY -> OAFRelations(ModelConstants.IS_REFERENCED_BY,ModelConstants.REFERENCES, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.SUPPLEMENT), + ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations(ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.SUPPLEMENT), + + ModelConstants.HAS_PART -> OAFRelations(ModelConstants.HAS_PART,ModelConstants.IS_PART_OF, ModelConstants.PART), + ModelConstants.IS_PART_OF -> OAFRelations(ModelConstants.IS_PART_OF,ModelConstants.HAS_PART, ModelConstants.PART), + + ModelConstants.IS_VERSION_OF-> OAFRelations(ModelConstants.IS_VERSION_OF,ModelConstants.HAS_VERSION,ModelConstants.VERSION), + ModelConstants.HAS_VERSION-> OAFRelations(ModelConstants.HAS_VERSION,ModelConstants.IS_VERSION_OF,ModelConstants.VERSION), + + ModelConstants.IS_IDENTICAL_TO -> OAFRelations(ModelConstants.IS_IDENTICAL_TO,ModelConstants.IS_IDENTICAL_TO, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_CONTINUED_BY -> OAFRelations(ModelConstants.IS_CONTINUED_BY,ModelConstants.CONTINUES, ModelConstants.RELATIONSHIP), + ModelConstants.CONTINUES -> OAFRelations(ModelConstants.CONTINUES,ModelConstants.IS_CONTINUED_BY, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_NEW_VERSION_OF-> OAFRelations(ModelConstants.IS_NEW_VERSION_OF,ModelConstants.IS_PREVIOUS_VERSION_OF, ModelConstants.VERSION), + ModelConstants.IS_PREVIOUS_VERSION_OF ->OAFRelations(ModelConstants.IS_PREVIOUS_VERSION_OF,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION), + + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP), + ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP), + + ModelConstants.IS_SOURCE_OF -> OAFRelations(ModelConstants.IS_SOURCE_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION), + ModelConstants.IS_DERIVED_FROM -> OAFRelations(ModelConstants.IS_DERIVED_FROM,ModelConstants.IS_SOURCE_OF, ModelConstants.VERSION), + + ModelConstants.CITES -> OAFRelations(ModelConstants.CITES,ModelConstants.IS_CITED_BY, ModelConstants.CITATION), + ModelConstants.IS_CITED_BY -> OAFRelations(ModelConstants.IS_CITED_BY,ModelConstants.CITES, ModelConstants.CITATION), + + ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(ModelConstants.IS_VARIANT_FORM_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION), + ModelConstants.IS_OBSOLETED_BY -> OAFRelations(ModelConstants.IS_OBSOLETED_BY,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION), + + ModelConstants.REVIEWS -> OAFRelations(ModelConstants.REVIEWS,ModelConstants.IS_REVIEWED_BY, ModelConstants.REVIEW), + ModelConstants.IS_REVIEWED_BY -> OAFRelations(ModelConstants.IS_REVIEWED_BY,ModelConstants.REVIEWS, ModelConstants.REVIEW), + + ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP), + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP), + + ModelConstants.COMPILES -> OAFRelations(ModelConstants.COMPILES,ModelConstants.IS_COMPILED_BY, ModelConstants.RELATIONSHIP), + ModelConstants.IS_COMPILED_BY -> OAFRelations(ModelConstants.IS_COMPILED_BY,ModelConstants.COMPILES, ModelConstants.RELATIONSHIP) + ) + + + val datacite_filter: List[String] = { + val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH) + require(stream!= null) + Source.fromInputStream(stream).getLines().toList + } + + + def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo(false,null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, trust) + + val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) + val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) + + val funder_regex: List[(Pattern, String)] = List( + (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"), + (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::") + + ) + + val Date_regex: List[Pattern] = List( + //Y-M-D + Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), + //M-D-Y + Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE), + //D-M-Y + Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE), + //Y + Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) + ) + + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 1af72e8d3..6b4deef0a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.datacite.DataciteModelConstants._ import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} @@ -12,115 +13,30 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse -import java.nio.charset.CodingErrorAction import java.text.SimpleDateFormat import java.time.LocalDate import java.time.chrono.ThaiBuddhistDate import java.time.format.DateTimeFormatter -import java.util.regex.Pattern import java.util.{Date, Locale} import scala.collection.JavaConverters._ -import scala.io.{Codec, Source} -import scala.language.postfixOps -case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} - -case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {} - -case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {} - -case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {} - -case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} - -case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {} - -case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} - -case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {} - -case class DateType(date: Option[String], dateType: Option[String]) {} - -//case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} object DataciteToOAFTransformation { - val REL_TYPE_VALUE:String = "resultResult" - val DATE_RELATION_KEY = "RelationDate" - - val subRelTypeMapping: Map[String,(String,String)] = Map( - "References" ->("IsReferencedBy","relationship"), - "IsSupplementTo" ->("IsSupplementedBy","supplement"), - "IsPartOf" ->("HasPart","part"), - "HasPart" ->("IsPartOf","part"), - "IsVersionOf" ->("HasVersion","version"), - "HasVersion" ->("IsVersionOf","version"), - "IsIdenticalTo" ->("IsIdenticalTo","relationship"), - "IsPreviousVersionOf" ->("IsNewVersionOf","version"), - "IsContinuedBy" ->("Continues","relationship"), - "Continues" ->("IsContinuedBy","relationship"), - "IsNewVersionOf" ->("IsPreviousVersionOf","version"), - "IsSupplementedBy" ->("IsSupplementTo","supplement"), - "IsDocumentedBy" ->("Documents","relationship"), - "IsSourceOf" ->("IsDerivedFrom","relationship"), - "Cites" ->("IsCitedBy","citation"), - "IsCitedBy" ->("Cites","citation"), - "IsDerivedFrom" ->("IsSourceOf","relationship"), - "IsVariantFormOf" ->("IsDerivedFrom","version"), - "IsReferencedBy" ->("References","relationship"), - "IsObsoletedBy" ->("IsNewVersionOf","version"), - "Reviews" ->("IsReviewedBy","review"), - "Documents" ->("IsDocumentedBy","relationship"), - "IsCompiledBy" ->("Compiles","relationship"), - "Compiles" ->("IsCompiledBy","relationship"), - "IsReviewedBy" ->("Reviews","review") - ) - - implicit val codec: Codec = Codec("UTF-8") - codec.onMalformedInput(CodingErrorAction.REPLACE) - codec.onUnmappableCharacter(CodingErrorAction.REPLACE) - - val DOI_CLASS = "doi" - val SUBJ_CLASS = "keywords" - - - val j_filter: List[String] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString - s.lines.toList - } - val mapper = new ObjectMapper() - val dataInfo: DataInfo = generateDataInfo("0.9") - val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite") - - - val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) - val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) - - val funder_regex: List[(Pattern, String)] = List( - (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"), - (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::") - - ) - - val Date_regex: List[Pattern] = List( - //Y-M-D - Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), - //M-D-Y - Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE), - //D-M-Y - Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE), - //Y - Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) - ) - - - def filter_json(json: String): Boolean = { - j_filter.exists(f => json.contains(f)) + /** + * This method should skip record if json contains invalid text + * defined in gile datacite_filter + * @param json + * @return True if the record should be skipped + */ + def skip_record(json: String): Boolean = { + datacite_filter.exists(f => json.contains(f)) } + @deprecated("this method will be removed", "dhp") def toActionSet(item: Oaf): (String, String) = { val mapper = new ObjectMapper() @@ -200,6 +116,8 @@ object DataciteToOAFTransformation { case _: Throwable => "" } } + + def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = { if (resourceType != null && resourceType.nonEmpty) { val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) @@ -318,11 +236,7 @@ object DataciteToOAFTransformation { val p = match_pattern.get._2 val grantId = m.matcher(awardUri).replaceAll("$2") val targetId = s"$p${DHPUtils.md5(grantId)}" - List( - generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) -// REMOVED INVERSE RELATION since there is a specific method that should generate later -// generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo) - ) + List( generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) ) } else List() @@ -331,7 +245,7 @@ object DataciteToOAFTransformation { def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup, exportLinks: Boolean): List[Oaf] = { - if (filter_json(input)) + if (skip_record(input)) return List() implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -565,7 +479,7 @@ object DataciteToOAFTransformation { rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setDataInfo(dataInfo) - val subRelType = subRelTypeMapping(r.relationType)._2 + val subRelType = subRelTypeMapping(r.relationType).relType rel.setRelType(REL_TYPE_VALUE) rel.setSubRelType(subRelType) rel.setRelClass(r.relationType) @@ -579,18 +493,9 @@ object DataciteToOAFTransformation { rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel - }).toList + }) } - def generateDataInfo(trust: String): DataInfo = { - val di = new DataInfo - di.setDeletedbyinference(false) - di.setInferred(false) - di.setInvisible(false) - di.setTrust(trust) - di.setProvenanceaction(ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER) - di - } def generateDSId(input: String): String = { val b = StringUtils.substringBefore(input, "::") diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index a63627d1c..e1607ee9c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -1,64 +1,94 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.application.AbstractScalaApplication import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH -import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import scala.io.Source -object GenerateDataciteDatasetSpark { +class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) { + /** + * Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { - val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) - - def main(args: Array[String]): Unit = { - val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) - parser.parseArgument(args) - val master = parser.get("master") val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks")) + log.info(s"exportLinks is '$exportLinks'") val isLookupUrl: String = parser.get("isLookupUrl") log.info("isLookupUrl: {}", isLookupUrl) val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) - val spark: SparkSession = SparkSession.builder().config(conf) - .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName) - .master(master) - .getOrCreate() + require(vocabularies != null) + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'") + + val mapper = new ObjectMapper() + val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info(s"outputBasePath is '$outputBasePath'") + val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" + log.info(s"targetPath is '$targetPath'") + + generateDataciteDataset(sourcePath, exportLinks, vocabularies, targetPath, spark) + + reportTotalSize(targetPath, outputBasePath) + } + + + /** + * For working with MDStore we need to store in a file on hdfs the size of + * the current dataset + * @param targetPath + * @param outputBasePath + */ + def reportTotalSize( targetPath: String, outputBasePath: String ):Unit = { + val total_items = spark.read.load(targetPath).count() + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) + } + + /** + * Generate the transformed and cleaned OAF Dataset from the native one + + * @param sourcePath sourcePath of the native Dataset in format JSON/Datacite + * @param exportLinks If true it generates unresolved links + * @param vocabularies vocabularies for cleaning + * @param targetPath the targetPath of the result Dataset + */ + def generateDataciteDataset(sourcePath: String, exportLinks: Boolean, vocabularies: VocabularyGroup, targetPath: String, spark:SparkSession):Unit = { + require(spark!= null) import spark.implicits._ implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - - val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") - val mapper = new ObjectMapper() - val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) - val outputBasePath = cleanedMdStoreVersion.getHdfsPath - - log.info("outputBasePath: {}", outputBasePath) - val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" - spark.read.load(sourcePath).as[DataciteType] .filter(d => d.isActive) .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) .filter(d => d != null) .flatMap(i => fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) + } - val total_items = spark.read.load(targetPath).as[Oaf].count() - writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) +} + + +object GenerateDataciteDatasetSpark { + + val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) + + def main(args: Array[String]): Unit = { + new GenerateDataciteDatasetSpark("/eu/dnetlib/dhp/datacite/generate_dataset_params.json", args, log).initialize().run() } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties b/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties index 63cba917e..81458d1f7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties +++ b/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties @@ -7,3 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n + +log4j.logger.org.apache.spark=FATAL +log4j.logger.org.spark_project=FATAL diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index c7c6c6a92..50fe73d9a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -4,21 +4,29 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions.{col, count} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension +import org.slf4j.{Logger, LoggerFactory} +import java.nio.file.{Files, Path} import java.text.SimpleDateFormat import java.util.Locale import scala.io.Source - +import org.junit.jupiter.api.Assertions._ @ExtendWith(Array(classOf[MockitoExtension])) class DataciteToOAFTest extends AbstractVocabularyTest{ + var workingDir:Path= null + val log: Logger = LoggerFactory.getLogger(getClass) @BeforeEach def setUp() :Unit = { + workingDir= Files.createTempDirectory(getClass.getSimpleName) super.setUpVocabulary() } @@ -31,6 +39,51 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ println(dt.getTime) + } + + + @Test + def testConvert(): Unit = { + + + val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath + + val conf = new SparkConf() + val spark:SparkSession = SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master("local[*]") + .getOrCreate() + + + + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + val instance = new GenerateDataciteDatasetSpark(null, null, log) + val targetPath = s"$workingDir/result" + + instance.generateDataciteDataset(path, exportLinks = true, vocabularies,targetPath, spark) + + import spark.implicits._ + + val nativeSize =spark.read.load(path).count() + + + assertEquals(100, nativeSize) + + val result:Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] + + + result.map(s => s.getClass.getSimpleName).groupBy(col("value").alias("class")).agg(count("value").alias("Total")).show(false) + + val t = spark.read.load(targetPath).count() + + assertTrue(t >0) + + + spark.stop() + + + + } @@ -38,8 +91,6 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ def testMapping() :Unit = { val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString - - val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true ) From 028a8acad8b392673c9c94e49f15dca1c51b279a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 10:54:47 +0100 Subject: [PATCH 29/44] add test resources --- .../datacite/dataset/part-00000.parquet | Bin 0 -> 132914 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ee59c325ea304b3ba8511b196a9c180faba78de7 GIT binary patch literal 132914 zcmb5Wd0-QD-aq~sNa&$R?;sJP2|U#z-o{l267Jl}o3zvrJHj_G7F z^Euz2_wjnaKGPNM=1s`-RITYPTdnDN0?wzHNUOPw=U9J`=iIT8q=0c-C0?RXI%j=#|MNp!^rFp26)jg>SnCs1up33grLvmRBG53~Q-V^Wz{XEMB2J-y;#(*DYD!o3-#T9piOU0Ck?%*8W5C6G&7w>WTf?jWh z=1~}_|99H#FID~9thtUghY7|T@WJSg@TI!eTA$bFb#F|rk4j0aZt5xzlsKHqXOdE! zO{bDF3o{-NMW3s;7s~d#v(OPQ=i;DF4#vbK>dJe4KHd*Q8jG^gY>bUY`FM6T>v5^; z_aJkS^Lm0_kM3~Con|STlM+%c?6iU~1zFDL)g7+#2{4{K7w`rAWtTO*O{U{9f@5HI zpzoZI(Y#onM#d=X5jlVO9n}cmcHU&8=yQ9ch%Gt@VV0e39Tdn5ip)Gr4Dg|Yoj z=W3|g?-N9xHO#rw57HxLbCO%~_@WUxCItmv_68+C7v()(*)PTXGAD@wXF!R&NYUe# zw&;DL!23<6Do%h^kYl0OzQvCQJf0v(XqM+WkrO?F>yi4a5W{(vz$j1Tt2V3u`1}V<~WfTT%KB;?`F;~xMf}veBPkP<@E?Lmsbb` zU6K%!T!I+jA`y8Y;PrCl8jh`1aoJp2&ahsV<2j!9WvU+_hb5` zN&#WOD~Mtu3c?%^0&b6Y*y~y!5&gQMMwlnwJtikpu@oB+pbqc)d#pae>mFN|QNkG` ztbIWYy0*53CY$*H@A3G|nn$~!sR5WDWiYPfSPuuI11dtd!^`t9>lxlHiXc^i#qU~S zSq9val&P$NxffXv%X@sGKcZSO%16B3s2~i8e$#CgK`%^Ya!`^b^^3@VB~`h_m<8>@ zxyDabRB>E2jEz@t-RG%)da^zp%X#?~%aAs}3FW%C?XL&2pn_nv`Gr7LFj&ov2UISu zd70WMctHMqhVQ-p0L*?=O0Xc&fuLy-Jn9zgPm`WN(5@7O((3`C^6P$V5kPl&-O?Z{ zM^i~w05#y}FRE{tWaT})A0*zJD1wFJ6$Ll62xd3vx}i$|6~(!SKq0WnTwY>%(JKf# zD-4p~?}p+)9|S~CCw|+kdj|bh+A-5OCO>{Zg^Cp=+VvzT7!r>XcFvhTOGaGV4 zE@;7sOMn)6Ji0$3@mGn8VAL0h`3IstuH!NxG)eFEabmFEiz=i*(8CL!NHC!Li&@}d zn%z8czl!tdKurWaAd$JDF_(1UVWUU%fLOO3MP@HNU6|TO=YaC_i|(O}#KJ(q>g0XG zH6Z_>t6{#lsU7+`epnUmvBbul$0KYU9GIb-7M@i#%x0rXE|rs$Bh`O4a(>q9^|~s0 z|99)tb`V)Tg6Iu+MXXy<5%7CB_dq5Q^cc2-JrO}y_{5;MKMlgv)_}~QJ^U~fy1!}* z+V<0bs6^rTS1z#^nhJeOrR5~lEhnQg43NjG+3=JHN(OC}il$)RK>2wB1Af^BHwRo$ zWY87#$2=}cjQ9l~7xe~Wflsx9fiwnTJUju)KpJL@zYMx1;0k(Or`K9xPI*_bUs&%` z|C#Z?`u9Q8#D^xG(P@XJXw<$8i3e7we8Dfb2pMDqT?@jY&eXR}st_Z=nCJ}(UXN>U z{Zy59EE*gzUBdZA-sAR1Gb667M>Hjt!RY(l>k>*X&gO>Xg30u=AZxB}xn~Zn5g2gK zMljE?)ct|Bn+Y?B4)5^?2G**LVAP<~!sJPMek3yz1Izz=t?6OSzk31XO&=wtf+aVT zr5|nUEu8&q*zcIq@-M@FY~Qc@3zzSjSh0p6wfh#oypkZv-%e>&>&e0TxoU>|inOaq z@=q-hwV6zu(ZTin37eYKUN`I7wM2mYc-cbu**}|v&#EgHzyq(S7OMl~fvV&1z>ez(CUc-r>cZZ${Jp;CBY{`v;ENg4Qux{Cbh#~7UNf4mv4&%2=o3VQg$ zC)LZywy}1#Ue!CUxtUyl+jZ(H?FBzBczCv~3Hcb`jcq9(3JF+~aW>k;dr)eQ6W}(`w%0CCc9Xxt(<#noC zU#-4V^`lqQqPq4WXw&5(%~Ry*5vO`BIe+DLb(det9T+$X=fBS$f*SMF3ez$FRWo$)^}96hk+02sRs9~heb%^|Q$5K; z^Ok>8{RR2dhY!GmI}J(o5%QL_MLk3=xMaDyMYScS36Q0??tx)4U-(;S-F5els~;wd z?i^6hCvVpN1ZAJ|&4sde2-RPx=8d&JJSXc;wGm4ENUQ!Ri@g1mqLOt6Tirv zvHf3S_m7``diUPX-`(==Pd|NL3Qz2Q+HLyiOzFQp1tsn{J?+hZ-9NVmp-qmA9G7xR zDw!Qp(pg7nlLNd>IhiY*#iX26_?sI|%Z`vkQ+9FIQR`4*a4u(av6RAsSL)cjd2_8p z%H=XjB%hPBMDzSKnE!@UzEB8P>i|`&Dd$odD17B6M^Z}2y_%&hU2-Oy0*?s_90a#B z*(oK6q;a8~&5k;R^N@}=H>8p|aDOxb2JRV<5=wkj=Wh&0z}HN!OO3!qX<$H!E9z4L zD0%hf)wPZhC=|L;%}X4cYjqag7P2QFQF~R6S`&{P z#0qmv&PFpzx^kiVgK~rJyozuno6A509JOpY`l1<1S<>0s+r*?ZsSyQkW+Wz`R0b3| z#%z#AnSoS>iOF#V)Q*xItYzSqWKJ0v{rO8Yl}NzNQ7Nakevy!p@JK4N4t`=bD7hh~ zu`}$7NLeVUCoO~S%`z>c=@jU$ESy&Lv{bW9I38yP@?gf}Olxlsla<%!3o1IxC`krx zgg-eMJkY{S!Y#>sG%lxd&;}-}tI;!snIz<`(B_QHMC17^+zhqlQjC;_{%lmh$CTrv z46z7hbr)OX@`xN~BBR7i6enKZ2tCcpIVLs0#71HMlqeHZz&C{InGq?TCyrFj;aDHz zV!*4a)EzT)s)j~QuM-zGwN|&(LOW|2xH}S;neYf=bZ=;3s1iiNC^3zTT5Bsqwa|Vg z!{nrmsbq@SYVVu_4njON2*VX;q+~LcYwFaTC1G-_Z#U+1^?vm5Iq2C^Q#ktL+I@PHM8Ml%)OFs%TU? zuz0bq4`uBZBO}2CWnJ%~EcnMgEihqFZa$ZSfwdc}U}!-+VB+%3a)uVDeU%YGJL)O^;krHI&Pxvmv(| zL<%C!e2$f6H$0olg3FK@Wz$3HYDRLB`K+w|P!Gb9jgCKOpN*KrOk-{oM5R{!PL;e- zN~Gg3H}RDHP1D`xEJ$t~Mq#!Q!Y>G7iQOECX;R*pa}CN#Ia4`d;3c_50`VHnLZj14 z8l*ibXIbK9D+AF>PEIgH9dWtjXxc~S)3M4KVk4oFvTCO>nNLLIO!!yEbUqSSqJ7s+ zHV$S|8?x+1lIPkl45QDiLh7K*<9Ja`$9<-FJ^YrsAiG-YqDzMC}8XB1T<*v?#>dGVfYys(5 zev3Lsf{cg%14YN1iMXbMajydvC&yjh(3`^3sY*!+|BnbR1N{z~MgQOgETKw`sYyzIBmX#2Wlh4LorH8T`8{s+=_~ zKY4m3l6Gw89MIgHd!uWh5QO8s8Zw=Z>;9l}4~Jc7HSw}>Lo%L{VrBE2%(vGRYnNma z627MR^r`vK#f6_g`8XW$VJQw~$WSg+7PB)EaZRYI7}fsf=3?E5V`H(Zma#ojEM$?1 zIh96L-7M20Co{@AN39{pbSm*I%y})tT9Ghq!J0c5ehd=KlwhP?*^qN<7>722gaC` zfS*m%#34kKzyAf&W^(yV`MYF%L?%w_=)z^|sUgI)fo4n(G0kjwoJ=GX{00dv2YW1L zk#6Rx5mXyynWnfhCPg3$D<6gWQ*kg0AsZ6Mkh;)3b4O}ON+cv=zUyggy#$e1qLwMQ z2x+7#bY0s9KZn0IB(!pJFelX*U0qT6~fyJf|G zq3dDPh-VvAL(=hjjTweYS11xS;Oh*;Aw$xf2RjI5n#^MIU$u2enL+%BPDmjnSN9)x zu?z}rKqe>j4P+;qXa9`AdD) z-c&>Z%emkkuc|pyz1)}uuL0)9jzXiTY}TnQsG>(1RI&r$XrxB%%mOJ>aDZwVp&SXD zewjVgra7h^%gO`g8}xD_{&Qc&JW|OC%#tVLq`3ohPbwkb_(5R7P%0wDT%GdpkOUK4 zZioBWS?JUki0oC8(pch7W&7_=~xrpXDc% zN1B5C=$pZaZ4AC>ZUJjpmtR4gmGJwn zqpF2N_(3BY3fJh$V7N6iP9+fuPqKeP`qWS=pHsg{(4z87k*aVKKaLH5)TiR{R0994 zgfN8$5HbDK03w^>amrMx8$#{2lUf+%b^ofVHKQa)@thJ=N+9BAA?nG= zBLY+;OCw4S&(avm#Kgav0IOB1F}#Ts{LwU(q)?h--E?SQSz%6{20Z#QYRA7bl0)&@ z8Wz_XEk7?NrcsWhGSU#!cH=|l_Ec;T21J;S#Ls?_zSxE|S9n0%J(^lKx52mwJWH5BlQ8_taKi*jnc*K* zt^3KevhMU05ctF|0Mj>ksC?!G)HGu3&V#JRSNu!*kC(9L>s8&O^n=)B7T$)0LP1Zo3babw1O3No z&5jxY1kiFShRB6-f_l(#_+fpIl#DC*pVKsSXb`EAar>W7TClba;$0mB{4meLb`%zYg=R`7Jj+o3S%$m{KAZb#zl$uZ_$zA=AN2liM3Z6ji)xqY5|c&?AnVV{1#ITevhvXH3|Vs_F3ZY8m?KSAF@sD$8`B<8rZR3e=yJwB z2z4TPyw7N|Kqn0aY0-Bm(IJVQW;HLBl`PoFGBc>|pACn(VJHL6m%{d0^h@)ZvB-f# zB=m*_3CSZ_ElejlETXzdKlr%sJ)jT1n(f9p?YUq8OMME zTCa;Lap)O`C%_T2v?Hl_9uLgcG7hgm=v8qE+$2Hb=L8> z%8~u3?UPE){F+mh>KoYaoFhF5rSZ90^xrFqY84kGzQE5`HhJZa%625!vw&fL<6T0(JhA^v?l;x#bXk%-( z3pD?t0ir`~WxdG$rC^>^??9#pJ9@iUR>Dv=fc=xB#76vDC3HAri8}eG=sqVcJIB=; zJv9JbaJ*l!Y{P~PtQ^nE5bR`FKv3MdSi+spVsZu*cWT{`?9%kpw^cgcsM0iS15b^S zN92RMxxrAPESH-Cwr` zjqOE-I@FbTU7KNA8(Rlo`J3e{=MJaCJFD7(Vxno5dZu&QEaGqYbJ7%bH1M!68hPkt zpOVb4OF>|5cM*b}CKseeuy>kXk-$w7?1*h75mP>km@7Pn3k!WSs2b-i#NY!4%WwJ6 zg-G8sBq{4^vNI85+NJH3@>%K~Yx7JbUV?Ph=PpLI3qf5C0ce!ZI>y>8sC3z~I1z%2C5LgH0O=Q?)<(uUW`ar_KZ zcH#VPDK;cA-H;jupJhY=4LMWXibB<>`(iLS2P?xD(D`E9Pnx|~O-4EA1;t*-xC9D0 zUBAb_S*QMNYq4c(u^#f~nZX~j#bBKe3YKL8*O*FWLQHRF6dboKwYC@&TwsA*2yd9B zkiG^VHf?Z$?~s%R@f1wPnJ!D1+cmRhu6M^g=CY*`c$4CzmACFYi0J zOsxz3y-3B2;@^w9{#RlFaqW+vQ%M#vkL31zq{vtcI z_s>P@_RgD8Sv&u?Gk>(-9okqV*P@gmd)#3VBC#Se98>W5)yK|L_2lr%V!YpPnl5_O z20LgyyEBH2->OV864;xT);>yyxD@&~%vXsqdAVMeyhiU+(Kbjhn zssHd2rKky?E-_~S&|4~vSkgXypOVc>Sg6I{mk<>P>Y2rPa5wS^R|BQ;$ZP67aNdK;zO;Eu%-KZ%boMILdwR4|`tZO9mo*V3_Njp8aDLic zYQKk|*Z=hu6@IUnFl$&9AEd&*ia;+1kWnBch!f4F*Eg36wn_C@O)))qdvhuAhPtKZ zccnE)#N*xgZ>4p=FQxC$Gy*9BGQd7aph7Mgl3zYw)6seX8Lm4x@L2{`0cjyJ0GES) zfj&k}tV3JYAxJ`U0nyiFg0GQvU={u^!6e2CkSX`kyUB_5D6$^e7tAkY`#dfY()gUu zBLrOj?vIIyV+4PU_yElHNFo&@&gxmekM-jd0esvL>Zc|$h|M4(?)@*y3eZphPNVuX z6U&H!Wd!F5xB=jXx#u8rJmH&ki*J(VJw2pG1A&PX#Lg20+e?r^j-@81A%0@cq-}F1 z=|-#)>+=B@!3#|rS&yU{I|fKdM^6l>t{G72A@l4B;(c=_43n=hOhyqIlfcZNyXgu2 zVj~!eKa*1E1RbH<0RLy(XQf}DtpS16&t!pl^mbe8Fgc01L71tssrM28qTxPKE z?EEcFbS2`=M^M?k`u?J_>BI^Z5Ia67GaEcRAErsZmZ9z~sb60bT2gA;SHfPXdr>L1 zH1jn|DdwBBkfj-8$z6_uLuCy0r(C6%~woRn8qqhDDC`f zD}B)1cE=3Mjpgi51VD5z+B!Z}Yl*Ka!m${jep!I>sjo`4ivX9$hg)rv^k2;JaS;56 zJ3CwZn!0S%2Uw=+$h3tBsynz~y%7t}IK zG@`1tS1mG4jjf&aOS%`d+!g3dm`()s!ZuUnNGS>EGky|R_TOT(3p_ZpjK)qPajF%yEv zW@hq%2Wd?cri?^MIDlU+HB7(q3EOW=rJ?$9vT0Ofp&w}EbM$=Nx zF*xy(2@cCkrn=9kP%Di1c@wZner9*siX1jyBVtZIZ>nF6kRmQ%?29LUwBG)smF!Lp zzF@rXDANPj5P;keMOnlL)H)`-u=$8ude{D1czyE?>b@>Qyjl%3oB$9Sc;Dvfn$PYq zS?(}Zj71W1jE!XzV||ZXq#T=3OP^3Twp&io_Dd5ZfXp1cEPRaZskC;cqJSzXDQbVG zDx0P@w>7_30T6ntdi}-DZ#o4Y*_*A9;b1^q2B{(%?^|GI0L4iq(x+FUEd5ivF_i%D zC4rSnyt9LZQ~WiF6rC1j@ri}>j)f?O=XMz30R@oKRqe*)hypeMucuW~95{9{%1Ohm zLkqPVAc`Z%szeH$&pfzxOhnVAdu`iyYP88Vk0zcm_dw*KBwhG8qle^}lBB;Usb6<6 z5%2~WpvUFtcc*I$xjWp`t_3lvzix90mJYpK)#fAG4VVJuz2{K^MQqt_9>A(BA!0kK85kSgkUe{ z4+;26>uWbmv`-mppMv;-Rh7CAsP9XTM;5=hvwI5uX(rv#Q|1M43f!MmK5Mx~H_TV! zsLA?YWWKNsRP=vy`FLIk0uyTYJ6L*sWEg|^yf$U58ZSS)I3Dh@xG+6wA^t?)dcECN z^GSsx4E<<(&&qyMFRXL0wX^3YQ>68 z1ZH%IRsUF{#yiW4T+bf7nPFqH8?rX?MtrENVjg6ta0@YQe4Z8n&jh}*R@=@tu=el7r9~PWVXo^ds+(P* zo=@>+Vnuisv4$NhC(OgU0C5CSlb8l66FJ_5YN-d#t2bhy0`0jLsoL1Deud0ksTf4T z8}K1N3b)N5Of&Xe0uaTyOAxk9*Ft_S*=HdPwmrL$-MG`J>3mtW{6uJmrUIBd98lIa zUqXI1Lt~kt0dM4lt?q^pPHO3amF|Byc%<`s-nQX{nh zxcd)CO^jMDuCe{D*z&q->^ZYw-=6gk*sipKOEQkP z)1=GG;rqMs0heVqi+K9)ddK4xXaL_>g(9?THsz#nQ4atBHLMomI}g6F9C2;ul}>zY z`{84o%Ht~A@4Q?Sd5Jtei9Nrh?pP@p25P$7^krEOO27OOM2Jgqaj*SJe3-O6QbLp% zrc+BRD<30h-!1kFnq90}gp8Pr=dsNRgsKqFpP9!7(1eTabFqSDQvJQ9;H3P;c6{Xo zsrfyrNpw!|)8I(*(@-mYyVlAqSqPXJAO-!X)?#(m&D4;Pk;R|ja2rTB`~MJAcg?7w zHI|2|HPfdRtnO}i6(p%+11Z3>k}`Ff8{4O8rAXGjMnd}N6jGdPb6{Bx=_i$+=AXL z6Ehmk4Eq~(8BJ!jh7woPE&UARSPp5yF>pvq6>1;JZDL zx|H!f{q(U}+AO3#E$`~Osualh z#T0sQEt0652H@$HA$(ZV2>C$ZI8-$*YFNeyUS?gSnqI3p`msUZ0e44nRfpVQX{eGe z#VISbmlA+E-Gi_7%l8o;L)XH)J|XdRodo7Jt3;7TrI7+4yhJ(OQCYV zUJSxN+!L^UwN`guRh9up0w97}c>_LHO|M!@{bTK#m1yE?^w!sqzEOWkJy`9Ujk3Zm z$bK(mYBokxsdV@&dVY`uQiC{?Ae_1%j7(Pwa-<#4;rFQ*Z=0(1xdadH?pJxlzIOuG z-!}ErHP_rawGYxh$KSejY6b7@<%1#c@x9I+#gR9Q$6pd(EUWulv1OAH90!yRy;e-E z(&F=r&n#+wH9()LB_+G5oEJiNK-h}`*uU01m)7xHl;J^5(mvsZ5_v26c)!$OV zwbBmiWWS!7%``Mj+#K9;a}ZaWXsN`=Femzpq+8 zA1^=8a$881bvwsN)Q`tWV?W=Y8t50cl7xYjlKnMX;X1XIRQ2a9g;U#BlI%}s99v0! zzto?RY#ZR>_#f!PA7|l%f)97ceh&Cr`X*z5P8a3gbpy zSa0J>%u7@80Z57CWvzIR$;60aiQq)UypJlh_{(;@uibgoOm7%~g=8w5OQlo!c*!?dw0B^q&QlOE$&|b)DwB!e$yMS;V=mYqF zV*xjZqyh=2-ElP1p&P56jPES5%xNs(C_%3uIJyGA`RbyHr8+pCUaA8GCD>@q0KW+` zycs-qm;|8rI8QxpqkFY=XSLxu+lI76<@oQOSX~?dED_iSfN}sJ0tlzAUYU3)h+hg0 zEUbWpwDX@ciKpILRm=$fUf^^HTxicX!HHGHmQ}@6zXpGxdD%deNRUIMk_uwf`t^U< zIiay1A4wY9rfNX#Z=*DN;DNaF2d%70L%QmhwhO z+RE|aa{BD-iO0&{f2@4lmn8Dp?{HjQU%x|T{AK&XnV*I~BkhmWoF)rY1c#wSc!d=H zF`4X-r`G=iWaw%ea+k-Q7SbM3ubrUC+bI$?IJvez#lXLgVIV9+`s|#Vu-m_?`}-7X zny$`-%9nG?<$;{U^Z;EhHY^<~Dgxw!OM_FGvAlA1A)TI)VvtEFw~7ZL`5w6w5pC=5 zi;NfHk0SVbiD)MH@Yni!P#^0Ys;I#xZbpU*A^>7g=_ER~Z2=`jJzA`8A~<5RcA*lF z;A2fBr1HuOcbY1?w;*Dx{0dOLV$>fN)~pOC>B*t%iWuNf1J~NO`6cReqit$?%m1=% zgKE&~vbqd%i2ux^m~;&m%#k`II49$)>m~VVcdh6LsCFxx9nkA=3?|jt}BCecTHV_b5?-16>{9jpN>4 zZ8(`rIS%%XT!~Dxg)0$oXii*?jE&&sJ##Z&F+>7I;2JZbs}Xi0&4T?G0Exx&{fH{- zu-`**Rdb-c?63>`P8aV28k!3~>xcn+yr1{OZV`c7CcxGN9$3gh=KZZ#F-|k6f;fXM zBa~|fKAu%I=W&N0Yc`-^e2501j`Ev8pX0GA>Qojt&(cO=z2f-|WEzroA7&w9+1fD_ z1(F&hyBYXNP5~AUIh$O8G6nr1Two$Kr$u^hDQ&MXI(otjni$Ii)3Ig|evY*4&|r5K zoa`$pcBoV@K+IKZ6HL{}G0)WEfQEv1&n$uhQ)#JS8HpYpV=JYR1CN-(i*;roGE!s@ zWHessz$a*GkVds<-=#}w>Mxck-Lp)^zVDA0X z+8!$yz0Q5e*aGoQZpeAoG{F;a@O;~KXu?fYyAdyi&0WA0NJvrYC@q)j3@SNKUAbZR z1JhPWgoPb(&a6FfEuL1=`d6YDT2?ut5_lY#huZ>b<7MZZVZ6Zq~-|?HF?@|W7J*eQ@0dbZk_4~RXM9V@J5a8*{o$B<#cu^tLbo6b43UKFo&{^ zkFzRA$o9sJ&@3ZzbWej1iZ-Inge~;Roa#1ejmhzVslJNVFzgl+?%kl`*aM4^`|x74 z@u9`)yCJvfx{9u#@rNZSPF*ow2Og*Bpx2~O+oMHW;t0rAwtvpBu5^e#pXe1G%|@qr zM!3&1F)dRw0u{IR78p+yl|QaUVdsx|Ez_Aws?C;(ydI)DrWJoV9VOMDP}i@dy*g^q z^n>$vUq~aDB|8G_N?>tDK|MSjr3Q45Uqmd>X4A2a)ZXcqtwEG^9vpGbC5<4q(Wr}h zegwHF$)(#{=6EQq?dfV<_S}APz8-&TMzUk+7QB^o^q7A<53Kw3n~Nr*MZ2R#BrrEE zcW4k(|D=YV41a}h$Q{q1HvB69^XZ-B>BG2xs!B@lIH_gAu@O9Z3w7c7+KzlSiu;UO zz}RIwK{hH`>^Ofs6m)j{#j%kKUyBCChA|kG&c`goG^zwIaZ!IS?$cvSYB9Z?#^<(b z!HIK+E+SCB^gy#pgZkO$;M|5Z`hK8e_R~o7voS>)JZlsOM6 zTaEn%K5V}Q!Q*1#1OJ^QTojzSZO@codK&d9X}jNW*9zOCF3UYE%meiX7qu;UCJY=PoQOIHWWEOHbKc3~7XVC1f;jD=3bZo5anTn&^>Mz;|h|&!K2zGXC z0>wMQL4-B=0_wJHI)meCE75BUb-eR(MK7eg0E_o}R~*y}ub%(Pmxc=DBPyuoV!+Mu zJ+n0LIHbO|KBNk)crAo~Djv9cJ9R*}Z1_z3m%c529>+PsuPAvHS(B9{NL z;|=ehccv4c8KXjY;CWr0AMHJ^t7?Ei61?WvEICJi0>O{W3;;Cp%pH=Fl~*NUmY%r_ zUo%d80_X;wx3v}+y})M$b}nES^s^2y{^?XYAID$YD}Xu-i8v)&?Wliyd*3l#-`!e$ zn=B<=)Cbdc-a;$l$vx#H28bFRf@~m z^O$xCJBC%c6u#D>jm?6Yduujms#`U{6C{48kIjN~tj=bZnfNRP{G8oeh)swg3_u3~ zw1xUXt9y?;k7?CGrilKn!wL-IsFIE=^bd}@C{o4p)IL%xC(CvC+7qM`K{t^l54~kq6#Ur7}3^ zM2>@hOuW&ZAn}&-k^Jpi9sibVN-U;87$YYoRt8FKmL15zrWRl^XOeE>Yi~broU?%8 zf=w6zQ20+V`we3ks1e6kK6K@-FnvQY-hL5&M(?B?6RXv5oLj9%9{iHo3OEn&Pkn&{ zLAKO&cs5&sP>`rte-y>|_v%)wLjqbH-o09_SCWuduKs=BN;NtttW?+j`(8)t%0*?q zk=5$XAllon*49H@M!(E!JHU)MK9($m#;G<5WmCcaO`;47&n;J*S2-F%HWuPd9x}-! z=>0TZDI9^2e)#3Fe^voSU zXC(0QVxXMzi}m3VNr_YYIP5eKWt6_OZWg-z5ll~Bg&ek5du#_W@hVa#9>MjGV8>o| zMMRx>2(Rf^)9;R}hH~_GBQ#Pi<1Vd?xeLluo zLZcl4t#f=bb-A8uzFfUi%cSEH?!VN?tV^agB=J*bE$my!(%MUbX8{Crq7>hZalLl# zTlmvy)oZY|`DWa^vwNFmOsWCq{0sbvU;U>yVO z?x9o+2&}0MklXmF66J_Z8d}9fsmJy}-4KpgOWHP~xs7EXqMW_in2KcO%!vAS1~v#R z0ss!2xp^A(Hf@T@n8rVO35pA5b4vWovbB&Q}1I24^;!gUNw} zMm|xwh>Xc;T(KH6Zg|4JN;jVbavUDnqe|s)Zf4mgV6O}oux4dUeSP%{_FItj~m{l!Jx3pzRhdq?836vG$q33t=?t^#wvU=JQJq70r)k}P-<`+`UbzYF90Koq}l zxr&L=UKUUpVAMo1N(?`Eg+7x4eW5$41&azu_Z|{G(e^sh~=>B!?ghnOX7LFIHYP;hC}xgLKl=FW@0b@{$75|O&oZ| z_uqo(Rh9$yQ|I_CF+^0O4#z(mHEi=woUv2R1EYyf3Tv7Ov}j_UFfdQ3UkS+?>Ux3f z08jq4%l4nOO#Dc|A^u2|J&v-UU5_-^hrT6tuR_f|gc&xVf-ByE!;4Vd@{skjeMqwp zvou{>Mhonr2W^fYDwO^_?x~JFH;5)5&e+hnvToJhnXFcpER=g;Nk6z`x z#VTD&KWC+$wl>?*pB|*1wT8c`Q>lnuoqV~WgNRY{8N5?NHlz~upOiPhQ(j4_6{pqo ztMxc-!u1B~K)I$8xt$+gMYFSrJf1#-UTE?qY%g6!+yMFYSLlgHTV8y$h5mX(#W<<{ zUKGW5WLMlaZ|pmd{^xfA(7#?aTr}J00OSq~)V(vH$C~o$Zu^rU3lGg(QBq!WHwNGF z#7rtx0xsy?nBH&2g-edRv2KRWvj4T0Znz9`D|VFVN!e zWHV$9Kw0H1>oNXTr%KE8`AL|(AM!v;E09p9++=&Q)Y>dXfuoyRx65DiSH16tx-MJO zu*FT9Do-zWSV-IVRY1jS>S_mT1p=O?xB-;4RSlqO`-^y;JKO_7z(=$3HIC2~d2Qjv z3C|JE!3Bud7)DDMr)@tqLe^k~nHK`0%Nw9~M**LGy@{Za5WZLC$nM>ehwRnJI*|R4 zCm7<}O5+o~3x4Qb(6=8&t|9T4Cd;ccDxYI-Fxt+NeLV$|*=N=CoikwN@ziV{ZzP)t z+dWRrblkguUd0i1-vRp;NFj^~ECP8xvA_(74RX7^%@C@mN7bD2!P##eV!3G^5qsT2CZ2LMmpSYtYMXdI4~OIVyuNi&7t_`Hti5p! zM}mttqXZvt(=X*4|GzJv_|;Xz9C@0hYAby!A&LQDF6%oj_8_A&7hZxeSH0XgZ=F0k zH~PwqjS*fB`hx>L5%fkxkojmVFu+5q+82b~LOu`GBk%h>v8&85{%KoYxJnS<2Z|g8g6_pZm4NtSXNMQ)! zv-{{@lf+Pgot3Jke%x0-bunxg_+c2SR=N#Z&pfoHdvOyV18qN)XQW7M9j%l<^R zD|!4%7i_;s04r9>cQ+FD=5P1Y2WHd+Jjwb=Vn9;`c)92g`aGM0^#( ze5!V-yY^|wYHrXBh+D-Eocc{+ld{79z17d z5KEVkjR}Rie|-1$0ur?y!H?S!z2cHeweyNgMlK^z2cE}=v%)r{X+1HH=Jz~vAPIoz zmq-5N#+;7Zp=|mGr9L;H6XXyt}1mYo5oa zOf|~s5pmMCm(;|?mYtQlwz{|O-J4X^JW(kesG$z7Jo7;f>s2*|smtzgK=p3#R%0muZ(rPR6GQe9y{{? z>$v{0BLaHp@IRCH`NVJT?Y`|^@wfM4x|iB{l+e_CcQ4}+;Z+`mS7JDfM_A7hqaWUq z;^zq)+Kd~tHNNt%a`0XgHt6L&;))!CcYg##Sclw14%O$7Run})T-FvSLIeR9fyi(6 z!fSp4Jin&!JG@i_i(~MEm_tUM72&lGD^?NK01vO`fOmonA?y>N_HpP`rFlaGlIwl1$b?a80jPc z;LnZR{wHLF_kFJ zLze~UCmx*OJii<^&O@EpVFu*f3p~O#mj&u8kj|t^d74^d^H0}UDc}XHm`d4)s$hpFn^$r!@Ec$MAKh+5@>qWCozi3@n8o{ggn->lP%OW9B`xdd^=n!L=*_JhG9aT$`wr|#9dkw0` z4_~cHR#VZQ`eGd(EfdZ>7V5pkH15#SZU2FUI-TalHU*(B>Ua#v>fhNBe%w@k2zEW{ z4q>epvg9{tQ3EZWM@6PnJF(@4X}|!gk3oUjh0)> zYMQjVIpq{RmCW44>O=XYG@`ls#mCE?jV9tSidi-&RX@KgsB3W#*WPMBzIX?ITZg(G zsYeJ)**wD`s>zIhbnMNP!(LM%0Ppk(K{AdP9bd=kOC7|Ok*O;%VRzWD?VZYfudMlx zMYuJ_=8*HE?P0@7m$PpXYAc`8I*QViYcK1cRCiao?hVHUbJd^RcrP9;r{$}0wtTUd zI_!guUi?+IJ>~cw%Z`!I)unhUuQod_Z2$`Aw%7OKT@6jMw1xK>;n!}&(<{2SHXx?e zsM(TamlF)aOezuufggl-?P;(Zmz1@#6k z3wcw#ZkdYRh%Xn>JWM&jPndfLnQM(XmL}BemG{id4RJ#Xk5ZR_pKV&&=uaL zi@MtznFZagjg3u>E~)*!o~B-=si7s@;e$7oa9n*C)7#q7~XHRQ=dy}iBDctL7ZeK!KnuWT&F3~0wp&n|N$MV5o_?fV!yvqKesv(XK-A&36 zJ+yl*2d*Faq+)dDeXEauLGk5Cw&)%smLDiL|La9nbluE>`l3oN;jQ~WjJ*k1Q)e3f z{T@u@B%Hh?hvWbO;z@u=kc1E*K-6HuqM(2RZm4L00AULzfP%YKTdmsK#V&TkR%hx| zTf0u{)D~?!wVf_@TD$7hw%X~U-JP!KcOS5w&iDPl>-t|;L=2eq&AU9$egEzqWW(}} zr4Z;A@f%VJ8q%r6P9uGXN~2vr!LWK0^MdKn(_fVtKRcEbt+-W|W7nsw%6zD^DJj~$ zH%(DeeHuHmgUhkuDxwK}%;RUr%~5{`@s2Jj=Wv8ZH_>RvSIoKDw)SYvI*3dR z;#aIA*3J9*>U5Ze4CXxv*`Df>s_Y;??in>uG`V^F_uYd}89h6HZLGYXB$jz!XIx3q zP+0QUicDAK7H*{b?F;f7&JHl~o_7pCm#Y0@If{BW-^rrZK=f69Aq4oQG15p7P< zbc*AjZI|Ws(YHAnd>n6c8wPgWC){kbai+N+&o`CJ8u5L<6b)8rOFFWkZA#`>EFO5HK$>04#qw0~^HTSe z$ev7GdiG?R?%7I32R)uG^Mv^W*~Y_ah)t_Jd{#E;^)xxAM|);@ROb6LISHIbsp!#(C^o4d}%lv7H*5(WMt}LdvucBv^JJ*q0 zHdO6_2=#u$jVda?#dO16+fI?ha_{=+lbN=q8rKcc=kKSE$5mR`bbG{7$I;9L>}h+t z;27N807I2lr!ozncK2TD3DhDKp^ti^4*Uu zr@xx`C6xYBd@k4W94b$x)AY}#22vf~Uaq2~-a;=r%A#bUK_PdVx=x&7t)MH{3PUmO z)6qvaEl-|;k`zQeZo|DPtZPcPipTnvL|?l?GFZ+($*=XQB`wUL-r}YGuh6Jf^nqL% zy6)RTFMLS!TRrrYhhCheRjcq23@f020IU z{s3+LCmk2ZyFJDRHLzj+F5^P_?$oqEOAG3LWpJab<};_6ONvQ+c7!{QDl}Q{JIumKZ)bV6mZS`H}~x=K?SDwe41h1cZ(<;ur2NRUKz;WXka4 zM>i9H;x2w~8U4bO1zj{CC0*el`ib^VA9d({rWiu-1zwhTSJTI&ADmdtXYG!Ftb=jpbn0uzr@RU_RChRFK zo9?OB-}ZW8HLjgiCYE_?Ju@n*vVy#p)9JhtC_{BK zFv)x|lZCD{vx~p|`ej)n>|-`DzaR^8BAW<}b&ee^!~sA3;59A0H&EZd_D+ectXa72 z4ZKJj1&CB1C33v@_lx^@v7V~lRzyyQacMjmHoycDA5;D# z=PPjh$gvO;xgO{*8{~JiVJhwThT}|j(cAcsux69(E69H5y|>ubF`kp8J5TZNr{XDsD!RFZ7P2+8Irs?Dc4|FG{!LPEfxRdPD*oJ}6Av*rKKsIyCYTTdaFAB}B=?V}t1Qh~uTrIXrn>7@ z?krPc`B0B)pgGFfrlNImT8TTi=7mUq675eCI_KlV7V^y-NE*4P%!30hzdhw_GH+3t zS_R`2*Ga~xa3wQpQ-7!}z;{d`)0l1+y{NtGQW>A|b~Tjd6(TL3V0rk$H@4deAJLZm z9i$82&5RSB%Bzy(dM0*CMRl#mH6L8OK3DyMJV%aVR%BSkI&<>qhU;ZBO6h~>%ZT!2 z`ZG%ErD?69V7EBEr?;Z+wIPMRzDg$6>hsDJ4;!}~ZBvj!dJl^Vjk=iHD$F}f+u9Uo zY!%qSvd}MIY*Xml6tV&HKQ^mRY6>x_oAtRYt6 z8qhV^Nanc(zG>dq*|N73dVo2CnV%_b`y9eTW#m(B`KMa%6;bAcCOtW5OV%+q7CjJO zFZTSmmOy(^!$XHzU|{mhmrOQKD*e)_pLn(6Ieo;$Kcpmq#@d6vQ_N!0>wR8JpRISi z9GwQL-(?`DpkBElLpby<5qD(iZF!cN{E|sRXR+Zs(>@#D+Ni(JM2c&_G0EGt3##Xo zc&eE_uJ&m6YY$i7rj>gW4)djs*7cy=(>c7V@;x~dZ#=#E zpKIS6@BTY+y*xfG4~3h&T>F}g_=_bbeTj)Q8_rD2(KH-O+I?z#PMhsqvF0cBZ2w?r zF}-fHSY)k7!^Vdm!@r(Np5+IoDby02*K;lG@E(2TN@n=I3$2>X^d|!e@{=+h@dC^+ zn+u2HqrX#RptT{k1Tj$z1oR1;F^8_N*M>ly>guHO_tB-*OXLJAM3!T3Zwr+k64TJ6jAb^A&fyd!juZ7e7x8ly*WDab~68Vak(9xU+oCUQ!(k}$mD>H4de;%}g}w83Mq?X0 z^PKsC%;lbj(SO7#)bPJS3psHMo9AfoD4B(t!iX$oo-D;t8s+^wTC;l2`i3RZHdzYv z-E&cV=ig4!6yMQ!C~M5LWOsn%m+RLz?IO>HE_RXysCL_77y>6WA57jNhYf;Sh`ac7 z5tjL;0kXi6R|JZ0*k*6Soa@icDYWPMeXTzJup-h=Aj>xb@WgyhSLL&Ir<1qEMS94s zfIQ?E6nXxR?^}BazvbxgSq}K6K3Z3o04o%%sX5!8-SVkBiiOgMK3_*O3e^R0Tsi2; z8-y*|$fsogr-ZfV+hE~{yuu8;!hmJu$nNOD9;zrW-$q3}uHN_xGfU!c!bp`{m^;eq z#8WA-MvUVIjN^HV`e#!tZ3a_y=qb$T!b@6>&!GcSd<~ z&-UUw)P;OE*~6o7OeY|#8TMS`^Q<$^$}id!xj>pOkQw>FwT6K`%?02wKOgIm!%pvg zn64O#JQ@{xG>Qf7)&M%PjsCh+i>K+#cSLSEy!MvEGjhPyh2Owh2xOHGUb`JgeL6W=e?kHi4s%H^jIWcKl zoUB59hWhCR!~@T z1L>`P6w2*}$_p~{^EC4~uYWMR@ip{9;VtKhdaiRG?l%LM;e3;)| zLkz>5sfJ7JFut|_W}>M`Z=;LW6S3#hztbxwdCTarj5s=u*R9n5x?sapCc$Nmi!?f4 zXmlonKAB^M$4P%AkGauUN2=)dS2CJ?z}i9gR|%NM!@ch9wJB-LtCctNj#a#(oPRZj zX|0Z99CnHHvfv$g1C!0%$jyc^j2f@hCCXZT=Gi|IQ8A%<8wsIjv?OvRgA10ne5`6# zFn*m|f5w*UYxK9e1a3A>{-S(6L%M4JVsXz;#!iqaj>s8Yw#gZ>2+BAsIGF`bIiqgy zwL@!i!#-eM=p2}M{QM)u&Yu}oT6KhR$1;iKp*Y65J&M0!t0auOw~Az$U#XW>`)Q~V zMOqio1bc`gohfje#y$J-zLzs!YJfiCCezJYwDm3y+CFmhIONW(_>4m6unQgb!Zk0O zBU$kWvf?w*WEyW=&zB20ov$q7D#GGye*JX-HJtiVhMm#_$=X0mKRT2A>+M{(q+crR z(u&@$AcT=u*=Xl4H2W85z(+uIC8PHvR*1Nn;4~5vzW1)3CFotxn5PT;8I|o%`$!J` z{y*rsT&Dd&?ATI2N=t+d9^QZO6aMfcMi3qFv|-vT$d@OvqZoiR2dH4;DC0~Xn>@-m z11XP5OK1*Zard{#r!0`L!;*^v7GmfWP}P$#&-MmpqIwi_CZ8(t`o^dL!UBnP)&kn! zyfG9a-~gZ=m#$pS=G*jQMg;UT^^^F&}_NP3rw5bNmj{Ytg%|~ z3}6LKpM|E+Dga(vjP+>xOaWK`RAtzmoGyuVkdqF6IU#dZFGI%nE=XQo%)6u_|0(GV z%i;kgc55$_1@_C6i9aPz{%6}ajD!}LUX=|joaIF(K|5W7)%lE9B>avUP{0D4vSsQb z3`T{)eHCPn605xk{9uNW<>z^3BEAW>MHzS4O@i zp>IjvDD)VpYT)Eo7CL>iIIx@TkuVtulk$t`p~)|@<(TA{n|!Tt~)ng@yJ)>j%UU zIUpN2AglZq!X){(u&n0d_SSw&)T2x0hI=@$rwb$RFi*V0$Z(!J&*s^kk@IBZdD2ys zn;l#U`CiLqz;tFrN9Vpz(un5>u;vK&YE zBk1hki{gJ?p)(@NkBV|qw4p$F0Ur5? zOBQm;Iy#xiE%Jd|(!FN78FrQ_yit4v;$#_&bMFjP^Q3Ll!mG4u?ik$pYtSqlEGAMsy zP;R5L$dl~QldS8b@x)%#9rzSeO*1a z;BPyY9Dh?wOS3OhHEw^^I50fU7@lE;adDBUil(Uwch$HwywLfWG%y|_*~*RC$`k!~ z&Y`><42+=zGSeaz3x0NFk!oO($}^SDiPN{l$qMsFPV=`)FQ-v9V3fJ$N8^c^{>)-H zeLjBZ^YJq1!yS7J8sMelPxmy_KQsgvq-D$=( zFA9U~@y!uVzLt{*_9Zx-Fec*O-(m*dV(3I6t;5zKB_5#ouwk1drp>iQo@duS&$>@B zERJc`Q<2xXq1QS2pe)D6e7-q1{mo>s?t#CywMI&GYfE&NxA5$i=j4wW*FUKt_U~%6 zg$`e9Zd0TtdSgvA3vs%pKiL6$Rep0zP9!CEASHI!&4yJPX>2&?%}N#(<49HX+N$Wv zRfH5UtEAIyL?_>=lb0ROf1h)<z7`k=*)7kp&QCEDz#$*W)ve9dVKqqlt z6Sy(bigpaah^{4QE6mlLL~TrohalRP$mA)8B@81YihqEv&{-x!nUUWpk~(Y5O8jIL zNe&xZi|Fl#LQXP#OYShAIBQl3(;zPt&KX(Y8W}g;XjEg(YYC?N_%e}x5kqh)P6oH0 zUSVXhzZRaKknkI?G2VJmf;FZWkVVE9NIzd~$_a58e4WmHOlUDNPfs;28KbYrjXxwv z%vG3G(~sW@QqDp>bB`8NBM7hZW+s+jn<_CbSg=m`n~lu!gxOeuamq`-CiyqOu{<%^ z=7#+Lqz5c@gJs229rS|H`cBs%uw0)%rOoRFwB@qfZvuDYGm5~0Dgjbr@#qES7Bbx znnyW8d~VVuf0vWU&&Jk-%j9R@KluAn)!;sht8@d3!k^uMIR|2>LemU|@xGsk?jvZSQ6M9jgF8Rj9E2rVTv zozNpnWT=wRU>GfUxUVI>AGFV;8s@DD5m(yHgRUkCv6H&6O^Y!*a5smtdgku)#Y$D`163@;f;oU}wE-3qz zVB;iF=vuJp4u;$N<&(cG!R{EEN^sery6zMK#w^D>);#2g<*F7am_ZR}y}m%FZ#DKf z1*N0Ki4}80aNztGH95vrM!p#%ae8!CNC_m4#?Y&}AS`BI&~)?(c%%7>w)|zh!<^-MJNH)rP&>UbnE==hACLQ6d%R6}s{qRH-Hn z8+1YeYu;cfe;E>~-7i}ltrl8m0X2g9G>S0uH1Qw(-Xd{rrALrg@3d&H=jU3a5z_II620ZG73SwB@n5m@OBQPH z#=k20r#SOpm9{!1w5v@LWtWLn+@{~D)D(Gs zA-&f7`MrwA-*N66hYrTR_hn+qh?4&%OJ3mKq$CB7O-l7>GLAvTCjXX-BmXLMlTv2# z^K%p(o0K$zQFo_(p*z<^Y_7^?7=Ph|7f5w$st?~Q~- z;a`4Y=wYS4ku{biX3j|-G3=<9!OXpYwHd}`{z<7el~k0QJgLbokSI%NU0Viu;c2Cz zlVxr)Jgqbhuo853hEfH^>iJ_z!y7D9u34>rpOxlckY8m`cs+%oz$d zAXuA-&ce9QLrhd@IH+KzYc?DHssR7gxeoMECjb0u0#9qa5;wKVY*N}lj$J)!X1GrTUj{u-kzS<`T9oyY*t$#N z!-J>?<$WKf8Zh?wM{*c1yvUcW;klSz?ap$t^-$&qF(c{ozUMmPxV-W}n>2~n2GL>?fmoPvNCuar z7!EDBE$kR~EL>R#mBIR_Tl!PcF)!;ZX@Plp^Ujz9)*{qV}L{!GUW@I!*914UZu9 z2{_4+-(53q-j(=>^+ckhhN7dU!CPNBd%6f0cadbShn{GM!n-UM`^~cjjmu(LiFU8z+nXVSHc!-w#JV zmTmZW-;m{@7-*7OL)WD@^l?>ypyhjN1PdXEnSyYh?J~)M~*8WbtLo0x=6ZiEq#%Nm2>-(nhtCF{5qZnf-D5eLJZP~=VfJCm)4PBwT%m#22!pTiDJ!&|@ zbFA|BypQP6_S>MVggK+jz%Pjt@ma*tq8nxeO;X_>k?`Qb3)(dPu;_!|O#;7Rgz zsQycxyqMJRg`n{XPwLPveKVQ3=sKnFotjw+8L~=npThHWoPuKDa%>$y=i0HLHU2hD z=36@VVlmBxXuATy!9*{Oi{&|8W~XR8VBk!$9v{6wC2<)|GU;&=20$QBV_z!GregpB z&r(CKhAb5ZRQMu&M@PThlmtRRu7+FM7hI~Bt}*9o7;i#-&eGham^X`|W=eU-{IkxJ zP8RJ;9r#Aaq@KU1qfK#eqMq_HbI@;kIAKlz6z`g5f%R+s7kUt&gyaIUJotucor88ljCes?w5A~uE-=YyVq{SP);v@ zhcL#O#wdBWbRS7OuaIs-|8;5QeW1*DR79nf<6#W;<8kzV{VWLEcmiJeea>T8?5?BZ zWZocdn{7bvl9CyoZFnlKjYOWp#8)OWj>;%`@x>DiSx5^_GJg;5Vm$t_?=&>O@+lWL z5Z2qD)gWhx_qrT486xR@)k;MlD!&IF1JRzEm!^-fyMbQ%Q>(z%40GMFReWg?Cvun=mrb ztBa%i()E{AZ~u%tF;CzW?~mN1Mf8MEtWvd*z_Us8Jp+@Z-)QkEv}34QdXnYfxkhf`x|{Q{m!yfTFjNj z@H8WRDSYUtYYH93j3IxoRJJxacqYNP4Qc`)D@|3XBLe0?+f=gL9KJG@5jBOTLtMpm zq$_f6rf_a1#79lN+?;t0SI5}S%~afhePz6_4)p=wvO^ap#dI3xJRtYnw(#F) zt4qWgrmXZy6`su4SlhiZ#H7fuO=dKuNL6y)&oE)*D#x!Jwb}!iMO~(Hx?hC0MZ$ag zWOCcUT1iEZ5B7}b))5X(mlbq9<;uDOXhksx2i6fyrep0G(E+}5AxCMv!8AoV!{n6! zDA-2Aq!ED5r>kpyF_g`A|~!`TVpdh98#*5Eu)-BL8#B+*dhdf0z<8$e?gOgRO(2W@!eV z8UJ#OK04m;l|9*n>yXxDQavZ)7l9@Z`&YVy#GJu0S%&v?kdLS8)xZ? zmZ>8LIftXjWb+a0R}hNKGOe*D+M={$pomO{^SnWSjWM6&7@j_R!hr)(GGB|Y5nAA#xq0E5R%qWmJuRMQ90U2?)O908 z&51t>!iXo6wa%^c)S2(j-MMC*h&3}Ge~PSujubb>G)dF4K*T!j~=0}_&0LC{OzSdjb^|mDu`|@gu}mlHphQ`4Xwf6sX{F}v#DFe(FzL7!na-#KM8@(40?Sk6&P zC}{A!i0?@t8nL<@==b`FmN0i%PvGmTux4 zFY>b7Tz+QqSD&$17WwDBwy|r@@a9{TTdSJ3HlhnfuQ3R5ai$k}td*-{_%oUGD2Jcu z-_aza&uDk$7rA(QrQpqryvXB{`692ogE&orxXQ_LdUqBR58<3)`()YNo}4|Nf(6qd zIc`IZc{(m279}@&s17d>mf0~yTH*z?`nllEAEO6yV(&Fw&i*tmd zx;xBIbAaP1mIk)s%BHk9%4sq;u%^EJeV?^{KS7IFC2LO$ADJNED`ZdjE?zf%_)gk1 zLEmU$aomZ7r;S}Q!_#)Q#W{bO0Qlcqu^gg;ZS?OtTHZ+P!bp6iIj*`n&a*wk;mj|} z0U;aP2!0~;{HE~<`#!#|c^bc6UHLFb&U4;sTpBm94J+W5rEz#l6Ox@rr=72iqlaP# z_gG_{I%1x1S;If_P}!%pBWAi)Ii}zDWf56ug0DJ%??f?$!Abj(W=h>#CIo?_*I;C zHfC^ti19%SaVPeSNz%p`V5}Pf#$xR5U1UE3D8RcaB124>raukQCEdNF$wnzDsG4h1 znF7>clD!}_hEO(!Iv2&84QeSRC_pU}oRBVDLFDO-^V9W23%9;M+>YB>#ZC8h_-ULc zB?)C2g^|fc=?B3Sz?viZC!-o=PGmtyWqlBrR)M&5X)RIL2H_RuADFE9PMjI)_k&4> zN1B9%TxGn345dnB)`k3o;w*ooW_5yBik#^iqW_9>B3xx7(M_z~L_A-bzE?J$*Ht#i zT_5Ps(u1G~s>3$@#cdPx+a^Hd{D3*KuVDASf{Yrt&VrJH9Zs`Xxdar#d&1oT;bW&% zTbU01w9f^)`Ly?=5q`#2tI6ME`P$S_H46ldkt>pgA)S{`g?X+C;A}&1e%ZLk{{w^jEJ8 zrtmr)m1WVS5;CB&XoE=ef}0(}05M~px6~rS;oaiz7HV^ZMgWzNaxg`IRHgVO1Et`u zFaiQQG3nf)OHW`?iebK&bE=8yX$cVUdMhkh%ogHHm#I;^@9E2#s5&SM`6%wpA5!f} znTySzhjib zD*z(TAf4o5G1W3RF*{(ChixfL55!LWDH(-xlsGZ6`ZZ+=(~G6Dz>orW+vq>K&VPXS5@_gTY!KV1O(sL{qkRVeIo!kSOT>HX2OGY~)+?iZ0* z6|{&MVOy8YBCg=qJA}oZ=nDw9mE|Fl!QUeeKwu8zZP>$-R?kcRYq$Ug6FsV;Urd5O z#JAF?@S2vSQkp<7mdScr44cbQ$)RtR85)!va1CDna}_7}|CA?VEn}qYe~4`>KPdI zCC{2D8D+ZhRS&5`Y)opBKGs2d9m{ta@ zpJ`EEgxZJsMur%(EU};DQ4gUK+mMJ_M)kZ=rlXLjai}{hD-Te#Mc)V z>l(C7mUrmZkTUrWcF@-@zsEymbvEy+aTwR#=5D};Y?f^1K5Zb>gpeKgT|_n%?AQYt zM;^0BmY*wc$r(=B;BxCqmdbN{&$&-LDE^tL! zpiN3@MTxtwtk5$Fby0c#hPutq8K2~OkVKgNClDORTTgouP+Gna=etFbB1dCG-1n~? z?JOiYjHt#t3;eGj%kJenIfx$<5=Xrk-?>TH`QzQZpX6X22PU^0EQwwMo)gq866l@YMu|hx9$6Cydv7|h5d@?Q#$0svSL6LaP zOyaG43SoZq%Vh7KDNN$PjgwP;^sXqtK5Vutp=^xGt-n+dnd`#k=vqjupaq)%sZ$kogRO&Kt6P3Gq&kb=fAVV*YpCq=WuiH+qWlT7=QnHu_1 zLRNuQg0iBN>U8BMq8}Hb)kCzJubfGL#U0S1xeiYjMA1T)FtLp>8cxMEep1)~5bNYD zs!w2McsSzF&l+IVngxcHi5_|Pd2h82ZqM^ZA|C3hmZCsDDTLaofF@Wah%(lC>pQ&x zSw&cJ3x{57J+I4<-vkZZ{me&j`jw~TXqzE|u@~sC*L$5LN6B9jEYUCgIg$1&Xs_~# zrXrH}=?7VScOktd#ZyKfm={>7dDXlsoxZ{A&tysYAfK+}}~wMf6Ckg@Pp@rKr- zloj5EHE2;i!ZZ0OquSAvOGoa_odf=&{1bq$>gO5^-t^_)HhOF5zv4}~adcLZzA~0B z0Lnmi|!%6Nf{y^u6I! zEJ~=aC^+mgva5FBvw56$D}}x_#^-jvkVu^FRW7%{B-YJ|nftE$ldQkBy3Cvu`$KQQtQD49qe)ZGA^$kG6jFTbtX1_2=_sN;#w&{ zv1t?OxI0Q#=i?iaIkBcMIP?#f<31Vir6I>5DdslZw;@|0;Gr|gFYm#l_JusLh5i1<}_L<-h? zcP5r(x_=pq&L7X#x|Z9W+cN0Jm`rLFPG<)W+&;lJ2z!i_1x_bVN|~(6!Ti!tBx~;5 zIvy20{;ES7$f$fJ7p zKat%se=`^lbOEPE#k?mdET1%%*ex{1|2ML`>^#2*=W8T(YsV71eHwzmZtYlLH*L}3 zbyCbm3lpcGX8Lp@hYg-@EVUchi?FE)+UuBv-{ZQ0xeC(`C5+S=v^A3*&FhBn3zae& zV8L-~{$pM@(~P`s>TjBJIj~!Lj*H#OsQ*t^w+I;v_L(>t36~OO*ifLhy3MsPMFt?| zH9FA2dYDrv`=c)&rcWhvqXhL|bOb@&9Aq}(g>x*il5&}DtJJoLQc^d6PmHV|u*BAc z%R$}3!^@HZO&g8rmOny6*`%qem9FLKi0BPUV*M z^waS%o*MxsASy(1lQw$P!T-9KCZZL_w>Tc<$zf<$b9pru6Y9G64a3b*33 z8m1eMS++IPlx==xso9ea+H?8=5IOU`!|!3ILV{W>~OnD$>K!7-{>^dsf8Q!^>ft5KRX|FTuztX@fVUxUk>mu z1mu_FrJGqg2U#PV4`I*uNi0+2+8&FFV&-t(|0`blV|FZFIw89Q#Ss(or5$BUsntPM zRs5)&)qodhXsnvYifUnNb;>TQYVkl@+b8Nm7{*E;+}A!U~mPf81-g0Z>4 zXS8vmJl@epbSWQnjHOB^o@6~;19VwCvVFV8f_UL3mtv)PwtMdG(O7AA0$%VJmm!2 zxfe$M5Fm~It?|&4gioh}HJ=5Yo9@-+{0R-J5;bY38Y?1CFXu?hJZ&^ysp}?HiVMm9 zPRRPC{fGf%b_A&)Pg{!hZKG#6##41ITiWketcB{>r$`jht4Wa(rPWfRbXUsY9}=aj z{*Wlm&a%~Qs8oGYh&E1Ju*KJhy;UbRB&rm|pJE&#WB2!H>6BGcv~=@-qop0|Mnj*0 zI4?_C!OiY(_tAO_213W!K(NhYkR2SR_KEIIarl&d7CQCgxZgsgjq9yX*#9R++Otn* zbi6?vh>?B|<&r+lO2lJz^7ka-77@oS4Xxh;q>WC^hvFX-r17qehP;j?NZ-<@9!-#@ zbL$kpM@UP#;mj1(+Z^fsDG)%KEWJ zEzY)=q0x*Wb5zu!1*H?(lLvxfZ2WqI8=FbPc1{YIHl9=UCd?d-n9ib8k{H49B2h@W z)iO^HT|{+l+-S!1_{9HaOry2d6gU_9IM7$ zgUQ0+c!9Xn$i{z?h9fd5V0y!UtWR=Ph*F~yI$6|eRf!HH=>C>6ec$DjX{O_UL#91S z?P$n!57Y7Alxfxz=<<0~Z;(36qsiD_`Fndu$Mf>b<9RG?IunhV6(DF>bx8=%&>7Do z&mE5~C4?yB?rud;C58Md3?FoXT*cKR_M9I{mg^9hJeqLr?%*!xN>h0fqJo2CQNbAG z`>8evtp$3V&rW@NJq*XdqR>|-{~=delnx|aQm!=pDTgVqdRRSM3YHcsQyn-%Tuzo| zkt}WVtI6z&9sd{6(uOf2pb3%GM!RdJBT3f6HGN=LG;Y^&?msr8Z4>0+OWzc$@wZ zEiLm5(DSUOn?<-ZeNZDG3zt6B0*VxJvJoIa`&d<*Cib-}SQ=$#`@=)G+=SN}iPH5_ zqBKU)ghNM&ZF=10P-)7tSX*fK0$I49E}EwL-&E-*`$zUtyHasEQQEDNdCG17QUCu1 zN?&9Bfvxwq(9#0Bs!8#CptP(C@q~%GE^N5z_mVyGzk$;HW?t2S2y+P1`FK5EqV}z5iQ?^yhK^4Usn8Y0)g_qyTC9l#T@SuUQc3 zF`6Nbl76V;_OH}4vDxbT}Ye_u*4M}V|!$r{s9DI7vrD7;fc*RJP{6RL+M zWFD77ZV|$mTVzLaba!m!v5ceJugrqR-EV7wCI%e~{)G(C=3m4Y+q8=PS7uLh7G}5f zf_GR@*j(T*EOZv|r*idwn}|ZC{vY_Q%(cMg_6S>y{5=zdU3q*UL)9ne=mS|<^>7%r zNO+Mv(aJ=#g*K7)t*3=Ud|k2r8ZR|lNJyVPB%2+gNgDdTF&SD@*d^P;kgJSD(L?U! zx;{ijqD>&LL&;5E&sxMq^xOGrNYa+LG`Ym;b`Z82klu?Pwglz#j&yp3SGHQ@r=*IZ zL6?ZOu?U%7hTkPt`(YPG%sXQwKKkc$)OHo~WiFBaeWF~;&o7qW$%(7Q>9lY%W;cUE ziluvhfx0M9O8fhU-nCw4ot^%O793{q$>bgS$dm;52rvjmQ`LRUMXn^+11Jql!@~WW zld-F*=xMfz@(R@0`>YkEaBm`d*`7}ytsqX`hvJLtl!acA<9*WtG=}*tiv#(22lwB~ zB#1E6<>Xr;!_4oO@?nN7O8<<$*hQ95^ET2~dx+r{_bl#VGSs+vmne`$?)r(OW$q88 zxoo4)DqQ8EA5Ape;DN4=l*+`pv@?UWl+{BHz+1E{&^!~n`DsZq#A9?6qash=%b+WZ zNl=j|_kp)OND==wt?ULEYxTcmP**WMx0PlbmapNjvS^{_N3V`Qe-&o~#hxA)=m%^4 zNt`vvjcQ&!^D z!jTLXpd7=r9;=)FX_9e{JV0&8okQIm@#5|4Xp)7l&#JCw5+ItzEFuca7v=Oqwdrvq z3))yun)o-TksuvEm+qe=e+Oql{o(1xskHTKd9SU9!Erlvy}XtT$j=y;sm`!y%}~{5 z=22lVGk+C39}|mm30A~ad2DHk%@UMb6h8hrFV3y-WQo*{#_%@MHk@wkBNeofHR-LS zME~mwp~1{_9=sak2EbQ!;jODEDJ`-L2T0D2SHO(vzB+xB@VAe^*FfQ+EjfleA(L?Z}o-7iMJ0#P&d|e$i5AkxYi7MLtAe z;3u&l_0U&iWbFZA`CMcx(x2AQBl>PL>HThhdM>YFWx+0Hp}x6N__v21%_bp@m4DOb znzVz%X}p(iUtDpob3c(G9Qcru^iZ!CwQ-L#o}th0AT@pq(?{xP(R5ZKBW=iLv&6E@ zH!B5ewp1;gm_RpJr#g&{H?~;*yxC~=-@c9Bbro~+-u?EubrT_jhqVQ3Y*$XLZ(sM* zX_)kSng^k~?xU|*aT+~7Pd7ngWCw1ys8O|O>!)|wF-N2;Dm8vjEA{ljDbCe9GQC!&%U^6Z6?R}LYkIp$d_D<0YRgK+mGBGMEE|6OtUM5|_rV6kdO%s;dfXTy#d*UMQ% z-1bV=(7dU`DQsQ4pT31FgnRL2Xzo-}V{l!sz`H?TkgiR%X)z;1hs9remAS}@bz{*~ z(qszX#3x(5wNsgE4&HY$EymMLU$9l3Vq$Wh)+7n1CIR0iyaaQAOIT_X?z7RN7^KD@ zwB>iICHe`hi7FP3Df*Y7lugZVoXY>WjI|<}{6LYy#q^y`F(o`9?F$z%G7`2c?Nw%?tgpJ>*m!cH5 zbYu$5_kG0`rBickKdYdgg86?cQy|YG=I-5ODt~^ez8t7e;VYK80E=On{J5T{g0sLF zeQyK0*yXgO%Jrp+gMLXgf2rCsrEuQ8G_yCBx_77LA#}A6k*kN}E-sJNFONlo|C8d# z9ksjfs6}ad+iVu*1=y)qt0sd8pR#7=SWeqPcL{$W-A9C*`Wqw4GK!>*mna1tfS@P6qi8YuG@z zoBvWlqqYk1g=r|1BNwSV7!Fc*4L-48aC|ruGMKY_MF|?;4Nq7fYHMhgp;5=b!k(>R zQ7%0faKJy~<6o>a$ITXwu+XNQ3{wj$gIwX9lGntXUC%+M9OO%O)B9>M)j>lwJ>8og z&<_{NdVLnO%K4gYP~ya*{wer~3qXvn90Li(7a%;5|Ka}M~?%nl(mYhj(JEoOTMeRiv3uRV?9)Hu7IO@pyd44InsiY0Yy*tT83mz0o1YrzLIB`5wxO}fJiz}v$G8}Ctkrb(X3*YzUDgeK znvaaD84vO`t%m93uXa1}}%{K#}n?4%(BD zF`Ivuh-lj57y9PX8yIqkgyiQsF%4WK|hO#jv=;@=AsC*XF zkMS6!?Ftgszf^cj1(hQhk`^|RL1vccMySmNr7yEBvA5|AOX$$@HAn zBK^XLD*J2|8xmU$SKg{GP^mFau}C$bs}IcI_Dv9XsGMb+W!*mdWD(GHO#bG_<@@{( z!EzMxWaaNGYf<}G;=D>9ik0FG0-7iPGpLpSKvvKz_uz}2GhZrWcC$Ial}0maV>}!7 zb$iPAdcQm%6~!S7=jYUKXvK$NO&tkZ01LGSC|8arF=H-j&Z4<{o>~fH5SQy6hh_Ip z;<|!pp-Wlm){~ty3c3I^;fDc45}!duHet5M;@Jx%lSuV zH{CLul%0H=o?IYItJdFCruqdd_5NnQQdvGw9l34x*yV%UW_$R^-8E0#UDIOoA-c{3g>5+^43zWqnx?Y>q17!)$%FZ>T%@GF zQKG-TT|b4I?CR%=HH}xz=F5u{!=%6zpv=?9+BN2@W@~e$Xm5w%#$qD2wrl9;TGKu5 zed99!?pCOwEut%9MlF}Lf12>3o6m{m-~RL28xFdd7~@HrJ(d2f5k9(?5e&yT(@D4S z9J=D!1oxvG)GUZxHT$}&W&>$NQ`PiMp3BQGu<}XkNXQMl%q~-2mEoD7VSpF@qVD)k zz3uK|$X&m$;XS-?7aod!`BvytKT``?yy?nf(-AdYhFn)(W(#_Mxp(MsKJ(@p`JnOZ zp(l7}E^%GW1GxkNwHhV%8z?>>c@ z-}m>w{?`?Yq#?`6S)TWK?)!5~SkoQm33ZvW!MsQfm&Zn`t-ddA9q%yN!dxxBfe8A;RyA*i_r!bQqTB`07GQ*fhC${`8>nx5rYX3u>@KqLC1$siwIZl&&K7#v9Pgq! zc?Hs%FV2T*a2Tq=Mp&hWSXadaJYj6kEPBZs3~hOKY2XkIrh}Tj?|%CALOTAKeBHqG zQ;%inAIs1|Bw@pU&8AtC1GZS&W}>PauGz)21bVSdrl zsz&2GD(+S>hnWW6P!<=tJbMxGZJ5t^>4W_Mt=L{p5jM-) zH_KrwHvUibN2jysW;wq0V*@njUQ&GR>8zm6+a^DXJ3XNSx>#OU$fO15P_14wbKvex zbL7a@d?JN0AHMHQ+iPH%&0scFH(qdJow7mH{5^L0nMzWrzv!Npr5BJ&?hP~J94b#? zR)Q#utkK0^+mmiwztPc;GZV#(dWDr3y z3u%{LiEL!MDx|hYdb;f(b+y8r#k7`fyI@I=e!(S3owL;1j!A`CDYjWD-e#W6FW7e} zy(dTiNqX7A=>@khvuS0UGW7d?VM-Ff4FiGZDE6$o=e$#i>8~ZktdGX4${Vl3C4v6= zH3a-z%o$>xeMO;>Xk7nL9s5;=@j7JGbDdgL9TDykEvk#+dL{g$Z8X@RsKVL1j~+-R zC6n8=5MJYwNvHhN;WSWd=Do$h_{QG*km9ALfz>|aN*yZxBW4B%lKH`4R7W$N&C>5~hH zYjOd1{#Cfw%%}gHD*W;Ow|~4}wu@|epD%l7df8VzY(u**+Z9V!0#N0$?4SJi_6=`+ zk6Fs2`{)Ig`UB}Qn-ZZOe!731)a4b*uG&?0l?`s7x#{=oM;UOL3PP*R@6DP zx~zh$Rtqtb8S?{5gSr`>eOO(Kyk1%x+Ol8$?H9xkucaMo_Bf_0VEx;KZ;o#G=4hU` z7b45V44ojQrn81Ha~a#QsHr?{c)y-SNn>Ky#b+?N!Bt_r+0ku z#ASINSDw$29{`cZA>_Q0%z345hZ2lxzpz8HX@|snf*6YNTNrh)ucQeFnf8NBZGy;w zTolGX8XEto&ZG1qp+LCr{jvMr_kIehe-L;=SJBCrgr!XTQYNBRgG}l1z-%u>$%ZKY zslzWMrVk~i&z-OII`aI%TcF}DvD24Qg=3`s7%@Ff|9KoxDxXi4kV9<54yb`YM7A?= zY;RY5D|NU9rzGi=7{K_m{A$EJ7f z;!;e13$%xBz~7xhJCkh3@qx5xd^SYQ7O}_olbB#i4VzNI!6Cl!ajDNCyer%Hu1xCp zGgB{mQ)=?2RD;9ewc~s5r++@8hOWcq64K?#bUC`Z^(w;kg2-B;x6V+-zdC8+TS7UtS>jO%EMN1 z6zAhB@jAFaWrOe4K13A9)n4G^y~3|4+kQ>ax*)ptI)&d-lE0s5w+fVeyk_Z-0} z88S+|2h!;lmr#hfWEEhIF8gq6PaH@uA@urK=ym$_AzdthuN;Tl$$i3QlJ?6axy62a zvBv=vSh4WhwB&2kf>}CH5}iJGNVw~7V|V>6xRNZs;4D!uO`Tv8sm2Eow3A+<+XJ1x0( z8aH`N>jH%;;1r%^lFu^F>|~B>9X9L>ej$$=%i|&x2MW-jI0%x*h$1TW`t>>Y3&*F8 z9iOK3`h0Hg*&Bo|(%wZ}H^`vpo28K>`Abl-V=C6NK{)6G2+2UwB)Yd|OGkz6Z<$&}v0EEfobkPQRd+jOitC=|ddfiXgn!PK>|l zq5;}8TMDG+zcPitrVRZx1*ma9^F1npfE#WLwqsbhSlWKERO)fk>vrhe0E>E^c6!IX z;FLOrdnCzwBs(sW;#~!P7z_Nc`6Z<{0LH7uEG%H!7ckfj-1M@)2^rIp8PjMDa_Nha z6on7s$)m5Tl-RR;g3*vP8hD6i?friAz=hM(#!gSOEKsAo#rbNEY-oF;r4+I=5sU_5bP0|UAaD|J<^M% zhOb!YBtxCV?+VyO@?A%_Xrn213Oyv*LvnF3!FdAt(r)2=CV4)SzXvutfYIJ2$+yXJ z@C9+Iz{L{*VR?R!c-9B+)sGE6w1ANv)8Sko$~y6iL)ZsDCAk5oc-;`;yJyeO^Uj?b zMYt_ajZVKy=p#dYWC5NTBsH9YFc>JFV$wQ<6J+QFL4wJDFNGll%-9@Wm&f4|PLd5L z$%^}=K5y_o964XGAqE(Eyl`K`1ygcZCft#%+Y z2b@Y_zGP^=g!u>a71Z9i)B>jG!8_X2N9JCn2BQs^!g9$_xnyHP>jsI?835!p`G9y% z34oebEZ8MUy99$U6j1>Cl`&h&7#wB}kQ{KbPQF7rMLsQHjjke`!bK<_=(X5pxF=8~ zBvQ5|Qjp#5^|*b4S&}qM-ns#KW*|5E{F67xf%wKo+k}f+e0L>}=fwSie7|Lx5R#9D zssHt|Kdo~zf z1Pmtbx{xvFVN4AX@)Unlmfv7L2V8^DA@sj+YW#n2YVOe0ILC{4s<9~3Pygrfa6>?eb=HcPWeCbKs#$2h`lRGIuxer0QztC%I| zrrss(i^>)xD~a<)HmqW9)NI6HEYTa3=fX5#J_lLrWBwPOXBJQ=XkiWZX|5BIYn~tf z%Uj#R=&Z+7vL$4#2#je8#!Virc`9ATbgAW_m@Z@K)*M%h*)na+XH(fSwiG6Fb^{JX zoT>32kh#8M{J`1GEc;P(r@5PwqB5-zO@%dZH!&9&&eGcILyBt@fG?cmwOJkr75q?P zuIU93W-x)Q0lahM>UKxlcyS&X;TuqZMqA+qK)-i!KfYN^{4Rhw0Sp4sM+hWMCqESc ztvF9*zX;BzL~n=amhR<)tzg0QK$_UjH&K&fJhc=d`|Pc>La!`o%zN2hwUx~LqkJlpO}${spij&(=w}pV{`EmP76*Z7i}nW5nNfJKWxzRN z+T@FB8f5yyQ8Zey6IKAj7T^3z9tdr%u_*SxzFt$U>>c3M_zc@01za7U*UmTu3yyI?}*3h&D-b8SJLM3$vLwAcrw>|$Qx=nn6y=kh! z$o*O21gBhKa$4qzNM+L}TAY#rm1Dcp8RN3|0l&Nj-aE&&7XVGazq2K31mw+BN={&Evp()NV}IAzTx13Zv!g=yM~ zK1jw|nUS*y<%`Yqmo#alJGjz%gMztY_hEK5GS%8z>FNv}z8P^1$R10>a$R$yv~uz9 z_s;M`y4oA*>WPK0NOT9hf)4IJT8ioxFN>5rp!QMkIE7kuvR19?6WIn{VLp?b&mhGI zQ2rhiH4e&U%0bPp`bBtxX@7zd7gjjY0UhlucIMHYx_*Z<-|a4OmI7>45{kAJg?eM| zGQ1gZP0w=)kr`u=8HFfUdLamjMG$T(Y|2V*$^ym6WuNlCLg;$0#vr zdml5;RcsF-Q5-E2;9L-1+tgiT0Edo)B*%#7;<EtfYM8l;$M$BP3Aia1h zA=@yPZBXOu>1qw$M7%;q>R3jq>H`E30tAfDyh^%hl{8ljA8-@}uzWnRoEgG9zfrg0 zM%~HRX@|bh?RMlj9r=hda5%649~&ZC(6hT*!=@E#v8cxykV&>Mp;2YjiGsB=E!mky zA2YhUY#3Gbb%8GpRJTjeDw0}-SfRGIwN2b9NAW8trzMr*+SqUJF+GCLjT5ACH&pC? zx65aFK^RWiGMqvpHFmn0($CdEYRBS4>&6GRcb)u(@9VY!0q#NtKR{vwh>5m6BGX|Z zhCR@?ts#xxtED|!(pRv7^*%mRr0o{ivh*swXg7_Fig;ZvLtlvv1IX^GZa7qX;~+=b zocba)D>DED<>>1L$OKMZF_>U9yT*aGWltPKVW@*JU<#}CM>Oo-tb&pZ_OUGHa=~Xf zd+YTX$OWd=p)7Vg%e|0cg^Jo&Vl(KEDV5k<9@a+p*$i+KRHKXisyeg5dS0gfN;V6M zDUGpuCR0lfWrsS-uP0f>t~ILQMq}R$+pY}3YA{<3u0QF)o+?_B2=$s)7&qv33-Et{ zOJ(c5tFvT0<1AaN*VT&vmEj0H*`(L%*3TOSEGpV7Y67Q7zJ1;FdF^u1MhMbg-~6#n>U;I|T=92Jmi0aUQx# zISrCo#quRqG&W7a$)T^^k<+CT_;mN0z%GsIk#!}jpX zMi2^i7wkzBXYTYqULJ{^m#ts&#Y2CSkVb0DQNqVu=W3S9;eZ=oCQ2k9CycrNaUy&o zS@OxmduH&^+9tln2eG|pEZdt>@}4ml6_%(kSjU-!_l&q5d(Wr?P+7EyvoDx+=cW_f z{WF(}YiI17JgmnH7B%Jb<&Blqd_B#`1q?6AE;ewonPuyZQxbvGbN++2eermq(Jj{XD2(x=>K^AryfkXR?E}T33tdTs+F)9>6w43v{eK z1WarU!_~Bz^lcO4zX(OmAY4u&gdmLi73`kAFt|}evX@FlJ}ne#%bMml+3SN%LCpqf zk>4U$Rzb>_eN|p8($k0M7tQU=f_WIcpcVq&`Yatc9?514q0s}g9$se{u9-qPicHfq zHP*HmcV_20MA;6U_ORT9V%^MbTGaTk6WeYj{MnW~0KT#-^SUv8g}f*s(u<#(qNC`y z^JvYWjPB^1V*AE%_)l5XF}DT;LZ%(^5ln*zWmZ&aq1ZVcL|g_7No@loild$qk(JCl z^fb?-))Vbpk$6=lvl~jXHM5mGu(R|xUX7!(JHn+|)OZV#nCmu%2JruoUq!q&0sv%9 zFJ2xMBHU`wLl2u?c3{ELD-N}%Sf4=&I0&K?LbgX8jg3o%RNwpYw1WuDp);M54~pDKN0edl}l)XYJy`73(S;fqdpKawos0{x1Y$ z>#Ap&KdGB*%lV$o)0BBwn^zq(V7(IRw!w*gOkW5){W3rgfB=9b@03|L1%`w`?OUY* z2!u!nNY&i)c_g1!q?ugcSg^mDmG~41#*JaR>2htT10M7a<`gqx8n?29#@sj-{&_c7PWF7^JK z30{S5-_EvpA>BMAaUdKn#t#nni5X9QH3@U<#`p&mU|eB%_^Jx0Q4U0F%fn{t|EuD$r{G* z$iw=h9hWeyX~L7armLiG3-goWC0U+sO(fEfWpC(9IbYF?c!y$(D&KS=(?r@V8EPDZ zVAPhVQCN46Mm|MeSbSoVv1}e^<}sA2%&u%H;G)ef!F=Wt9oE8lfsm#^Ssn`|5vD2a zFecWjN9r0w`iU zTiw?Rx~(T42l+Xf9}hwE#M;TXS6(B{hh;Dv_vj_Ijs`xEX9+?)}vV=;PHDxwTRD9!9hZEq4pAJt%L^0fCIG4|B zTGm+I7_7{%IzrkfFL2evG6FNJoOFkt(=JX{cPBout>J7yCvch^j7`)uUK)v?w4;Sb8q($kp2fHk$N$n6TSU(du2iG@CguDL>c zbK~qnC<$FlovtbVHRW@;53NGu>@AJ6n@7oK__>&1za`PK#@Y1W?8FK0CTsAt?Vn8J zpRLA#{XbwrHY;D&Kv!7x_hmq1v56L^s*5nKa4Oge8zPx{8H; z)?NFowo1v1x7gmXWKK<$|15&4L7Y9Rr(HhE34Nwg7jSj8p*H9CrV8&f;rE$kPa$c;?>$i{ zPW;xIlWWZebk75RJc`~vN2p?ks+g)OZmk@%NE=%^8y|S+V`8eau9bsVyI3x^{NIvo zdP_#)q3ixJOXtA$gyCE`ZcQGy=3;2zw#VCT?qaq4l(Wv>jw)yA>M9#*kR3XK_+EOur;N@5ftJpy z%Lb4F(ee7D>U+{>nYJ3lD2|@C{wRAa*Swu-?Fa($+Eq)eXVm)6bkyp->9y)vwL3|c z+xP)x=h0wQy=E5Gs>{Di7p$|It+SNnXeO}N8jUg|sirTN%3+hr%<|S}D{@SCl)PZ1 zDcLuFVpdk6OV5;R;(Ct`g&^EZusY@dTRsSO6)N^9>*X~=x~4UuHHqFrI{!TI3P88w zpC2O3Dy29<;jXB!bA$9H+&^!_{qsQ0$1)HNc&E>^c@eWd_z;)@9rVqaxwVl{Z&5{e zD1n}@_ctQTt^ZYIC3r&?TQwYrYNmb3@$g5v>M7}J-tT#3Ef}&k9q}fJv^Md33<0)r%f*_L3z~6`4WXHph=+>mq09v0M>7lT zgYtax%N0b86;upcCReisek5c~^w0?%DWZR^RCk21B*?OQx032Y(zuNT4ZHtB;?~9I zkw7rBJ_2wX2@D@je^Uo{`oC4sV3$!HzDR)3o%=&d7NfFnD@DhX#_H9viCRrL^aq8<(!19 zTfz8)VLWSx$d(9V9@$nh!Qef9Ml{7QDI^nVxDM2cug3eO_PvncHJCh zOJ{U6f0u19gI{zIrue1TiF8GRo&qniBP2N5v{&q8eXxDNLxCOyhP-S6Qh=G+65pT$2+TEBiMahy+jds=^ap2 z`{+e^$`y%`=*dm4Kg8EvBff5wkPBoV;GdTrmMB>9>F2x@BJkGX>E|cgkPhsPJ(9gG&@Y@FADkq}CN50z7`6j{P_A+)iK z33^tAN?f*yqz8J9Wf7J+m@4pVP)P%*0j^U=Z-{NMGB#!XqDEfv7dPKOipm5Tpgriz z@|4XcAr2SIV*0cRpf2F7;-V<%ha3k=%68OkD7t~Hr*q~r2aJq-!gP8UuH5W}*|elc zJ7wiBR9J2AtIlylzco%=!*y0Kz<%b#S=5c-&?wMVe0>7s{%X<}YGRG)&paa96JArf zL*ck+Pq2j&c2rOFld311%9su*b(L8ZO*7Z~Hcg=OCa$Osy2%~(Q&Y4A8nbR#`jr2uY+2KXp!%G*nM(j~w zCG5q1^wSU%XhV~>9+BVg^X#>}{?I~M>@0yms_-0Q?ymCA!2#lGAjTOv*c%GU)NJ#7c1h|KTh-tx6xph0 z5$526)-|ZjVme0f3*amw+=2$Jc`Ap59)}r+D{BBG^t##d$l@@)v;g0i>op^TC}E^2 z1WQP){)Q?=snooV@s$iznd~A|HK!r(LbKC_L>+%I@LkvAKC4^J%+|N?#H{+5`-_Kr zY>8R$5OdKg`ka}sK!*{REk3Ac7X~YMT(Yn##IlQ5Ai#b&9Ak2V$>KEP>G;mMj>y zVBJe!D58~hG(Vq2+ZPdOv8_m9*<*@8#@=7hRcF4{!lDj(e75zgd~iB$XlTC(C8O%X zKr4I{{So%XIk~XsbtBB9yKj&?Qz-BEx-0|IMHQy)a%|4=`>eOtZT&Y18Wr~Oy`;nS z%fir?wQF;k8Q7qOixRh=zMcI|i*t&48HqET5`A(IJLDiV{r3l5oG*{UY9iiY?3XK7 zVT7qrDbYq*r5Eqx&OK7Ts(h8XUk=?LPF?@K_uTI%HF%gB`1@!to@fdSHT5zD{c@?@ zY5h5!2^dv%dmbk}WgF^!Wq-BX!2_tOcd@X01-K zg&xiXALtDBa^^DQOO`Bl*@~=;5;X_Z1?#qE@#ArGxfOre`(Bpm!LKlw+P#f_z37kp zTu}y&A&JA+GT{S4cWr$whE`JuU3$$7?zl$riFsvxHGHB@SIfJ5rO`g)-54tCFJE$E z$#^Ftr~h7Jwx!B#p_r+I%WY5=7$*g?KM!?MeAyM+~LVk&in9X;sSaCp$s}at2 z5OwzTCIXszZ-Z3vmgj5PO$c44OQ7guZFx#>iOXxIOKL}@ih0>zDJn{^tng%Fdnzn` z}tI(uO!hM z8p;ovWI9`kO391UlAJP-{k=YWk7Ibi;f%4a8TX{mbRdN-l8m<5THDYuXNMJqgXjhV z9odVkL>v8Kt{h_Y@doCzP$B*9W#R@@^szdJwhMOMBq~k{nV#h7uNpDmD#o5p(_wrfrUZlz(if_5z7XQ(v$r4P=KmWE5Spyo=xR?1 z!oaF1cqGJLTGaDv?nqw1hZQ1%vN@yto_Gp9sTPx5=mY8UvzY**LF@gMhM|bPKN__+ zG2P=~L{7`1>?Zk58_#9})ZS!fy;htb7mYt$CWZqnx$WfTq5A(74bbyC+HW7 zZ_16vlFw2~W~pSej{K5Bg09?odBFOv0liI3ms4bUzfZ}`G5y;`^*E}p%ii^#S#crV zAvamHRhTn$MwpzdmN5+t7WIw%k}H0Xx?4) zJ8fg`s>zmugj|1Gc(=v3(ccU!hVxR9>ZOzLnHBr>M$Z3sO|t z$*+0@_yk6o--8!2>VYdy>kn7dI#sI0u~pMmx!luA>nAHWMmN-IYTjl3OrHvIU->k< z>a@&b`q?U_7z+jwQ$fMo9of$6E|vAl86eSuT*53=JfV-PG(oy8efJN>=R1_OHBA)- z$7`5?jCZN0o5J{UBP4e&d&C)XdE9|wtVdjJ=&E1<8*%|l=kod;;c%qg@8%@&_MGWO zA@=@^{0k;cp<$&BvFMTY7$|@o7DdI)T~kNbDA=ax9oSHIEb;@AffqA#6cy87CEs)vq~4+}lVrHgB#|JeU`)pwD-`ummK z?`DlV{H97;+qe7Py&YQQQFnC2=|YQaM#+aVS@wokR1X~^-ZojD8^JjqpToOuljc;L z?7=qDDtw_T`$DB_IGd5h=IIRoiRq$a94D{f-Ylh8mtiPCf8eCBA6e$+LP^w%$prL3 z#=CX&^L~9#CHpUw6d1mpDb0Vou^^~~i?aND^%MjdcoV(35|LD6aoHyz( zoo;z}yi4E8S>AAMovv?JHH)VYeM&0(t_o1hlO>b4Z5=~%Z-AL8hmPT%XsehL?Gc-Y z2#GEZfCMHyQF`+erLHWA@PUfr7D`2iM6?xwN&!*M5AT3eSU-?l4?BZ{7PR=0Dd&Y~ zw6(7T(|6Y@;qRX0-#u5L!IK9yRx}^&JtQNE`OY|dL8h8t>@EqE2HZf!=Z2Mok&w@t z8zzZyxuv1LKmEn4zKt>5c$y)A#1%KpQu@*JlrCq{2qxCCAyPC3ai%xm7-@~-3Rb*I zSnVEK?XGFjqDzQE9RvJrT_`DhPHugN@Dvk%icuWZVleIXdWHRxq5YBq-AG}4us;&B zLBoJ_UE$O7hdw>uT@0;~J=~7jd%y=Rf0}nF&5O#!YkU!)U8JVNkKh4pt+*I(1Ig6U zwWt$58A1+aah~5twLu=d1fWz$7Ir{WK!Z*{9UM%CF^9hVMdPU zBg2s81EcmR@2ie11JkkOB<>hU8{-pYLz};iI$pOS^p`=G|-1A&ih+e z+x~*Hxv@=cR;G$)^Ais!KF`u=iiLN$Q}1v*4n}m&*6`)7k2x)fOXx9qF=3sqVaz9H zefbfmt?vtU@|CV(lwb&s#R3yHpHGXkd9dHJt&JNH#?8gvWV~B6(AevTv>umh-F*sS ziw3v)Et=9s(TIzw9L7W8M64~s=I0|16SNFEycFuQ;wAEBO+3{tW&f7XY8D&7uZJKo z9;Js^8q->pc@Zt6J+&%FZ_1&ud^)*!hqs_4DXomq$+hfF`KIS5{8+q`;2s&4pwa4oGuv zF1Lvb{Zn`@!9JJx438^r+(mx8QZsezdau-Q11h>+Q&n$Wox`0TWPfLI(f`9(t)u6s zBPpnaM&{GggN9BdhoT55H+(}Rj#PhBaech1+SYp0VvaB0m>@AaLbpZ@+Wp8}7;p?h6ii@5)g_ z_Z$kbKX*!fHo9?$6mPukrTubHpEul*ocp z-E?>vQ)Tq&F+xX6GKTJAwm}SSG$ZCQN#wy;0y{_{s>|MvzHS)o(Wn%EpJo09ziq-E z^HDQt^ggCgV$Rb;)7o5+D~uCWZsQn9KCEbeR#NhSA{S!;x5w#Q$LLNypwK@6;JX9s zRd$Z8`r{(%vfG~4o!C?GFi$R=IHUL2;XZ6U$g@}I4LE!{N88|Y94Kb$xF;NWd^PlZ z*o`nvrr!nq81P5=MaT_~blXs}vQMv8B9gX~Hm;^SR%$W7LQ9FeBA`U?DMn{+eDSif zFBPsk-2(^BGS~Po))}eeD>O6Dr2yRKLVGLo7WT0fN+=ieR1f6BFP<-g?)gFb+42+X z*`q6z;RuG-ATYNvHE@(aZOp#AdavW)Gy&C8-9?py4h@ z891ZO&?_3Vv3iEp9k@?`L79;3l6mdW$F@Np+m0C|=B;SDjd(mZ~S!$-Ji7{nTO*?CaR;i-B#M6G}UFlliS+uTBDwyA|4$&n_yxpJdg z3^VRfxJ^5m99px=^dz*QO;q7kbs|EC>Ne2@YfS%?_O{YXTD5Hu2=+1N{ z(C!HHyz5-pCu0_agsb~vnH$aUVcr%_Ty09%+@@c*(m3}bZlaA^uCS=+w?PT9rS5l2Gib*->F%6_!G4x+_}N^G&;Lxs5H;w=|}^3TV)KVQr~pR>!2tta@Y=Yb<)i2cjg{&GwW;$I#XWG<}-j2aeS-EEE=GjQOaXK=e93ZM1RAI7jFrtVe>flaP z377{NSF+Eo)!P3kv zZ;u$Y_$bg7Gnc{*s(O@w?L>xm?~r4ovRnNwo=MDfy-n&TiSdA6$2 zUZ(kB(??1RUv0AmWP4TG$Vg{29F45ep|*~r_eMWmjj=p?V!(7jvB!W|5A3JiQS+IO z@wc%nvyxW2Kfrte;&I?I#;xo`=XY`?>89Uq#aDZ6tD>6ya|jF#`mPzWh|Um~8wsXc z?$x^^sI%gOb3ZlZ5}R$7g9&B2xu4p`XJYdW(#zZBp|&AvtfBGC0VDZnHN7*J{ibbW zc(frnjV=!{`$sR8M#J=2Yaw)KYs3I~8^1U~A84;Rh7L&?H%$tg?WXqKr=WxG?+0mO zz7k5K&hks7vwxCD<3UUNsF#J0k!^GhE5x24T=t7)ZinZh+J@oq`+=1YMyn$N90f4rg~_dJ^GO>WO6YmK+( z(s>Fo!SzK8a3ir%+QuSc*m!K999dn9H_M@?o3z(@(N2dx=)K5FGto7m$pOn)^Z8^d zOx65j5F}CKr)8ku-&>AOlkF7(c()jxdXC$V0Hzcjx4WFg+tbR~`_%deE!it~US7_O zn@YLE=bInQq9)BYq1^QxVcRR@RiW5}68&cyDSwvhuX=Ok6JjQ-?0VzkN`yBRTQZS# z7vW@bLCxUyteVN(-*|=up9K{XiR^v-ML(?U!?1^LQj(a<%P@1L4c)Y?yWT^DG|8zn zNllt!CwrwL>koQBwG|l6fOqwIVJ$~Xtt7yfTBYt1`b*gQVu_e<5o={X(y~9!Mas^q z^TrG|c4pal!rC;KnrFAe?mv0-&}`zPJFkMGy2R-%p^t_2;k$9&k zue9mkXX$sD!MVhh`FUs%VuJ@M`x!htRUqC)IBB5BvZ-t;FV%D<{w&KU#_e2i`c~fh zdxdJ3Oy^q8S?kLYwcR@?Guuiz<~92Mh(T2O#+_h5?8q}u5)dZE>^)_aRx>UEUZ8V#*B zH0KlRGO-PKt=5OToKg&1{%Mi6P);mex(8d zr^`X^=fq#>8v*$@&cgq(bMOoO90iqCQ2gDV{j62XnQq*6*gixV;2z$T2v2!pPkHE$ z%#tUkg^n^af?sx4hK3MI6fJ7RPmC4YB2g$StI=%kiLhCm_n@}cMuKz74rP|T+N^2V zILIWO(`=g^NI~5LHA_3)zLY+<1A>~3E#8}fdEB-YiceYa)RjcLG!%|SlYu;*Ul8hw z^5Dp`Z*krmiK=khO{B=0U4Lpf@mF;A_4R|9356)4++*xdH+XL$8uPYWiK_C{cA{R2 z&Si)Npk+RsO$qPSU~z7{n#js!TWi-!k~I=XHHs^|?<6cTp;6<#3dVh8ha|5Fh@Kui zX1{rpp5}AKGxAf8BkmQ!<@irh~XBY~$KDKuB}dJmK&S zEr)NIw2xm)&ny8ti`NxhtAEqY7V8)@xq$u71SRNb!hn+yE`Fg7_TqGDMK^meLD!a( zUWRqVcmVsus01&-vCodJ#q1B(3c66C<%?i88KoZ?NZ^}K3%;81;18;6RO_;N)lOnI zrs+&4vU8X_>B(C98U=dh1$D`d;MeLu9zjEMl&6J0B@~3cbjS-Wna|C0v(hln$fLL4 zC@^6W3T2#B{Nm^vN2fOPkqHTvzPJ0hda|C zLh&9?(7QF5Ft1VLUDs-;jl~-T=T_F|edZE5oDI}5jHsVw^*D3a_0g~51@d}z4~n2L z9l4EOn1$H}D37mZV_|C47O6#oTPvOLDHb}w`$*W)aY1xT+)meem;->b__F{wVRP`a zgDM*h@GaTI>_*lv&Y)!X)AKZJtbTRX2rkcjo^7N%K9XO^S2Q)_FRk&Re7Yb52oTR; zWwf_wFfua8H&z8J>#BI944dv!+XhDl^LK?}INkr29^}2OWb=`N1?k==B;`4zs->JG z()vhULCstG8H#83tCZd%my`aYq-T`%ug%J97x($r5Q?{+;BV!sMN;0GuF9I6``~Z%`q`5jVZ~hBVk_oy(>BnE&N!H{`-XzU&7@(sm#Xx;O_1_c;E0S+ zsa`r1u@2rUD<_Rc28mp~EyuM5A5O6AjE}vqEf-UkBJ8F#(!Z&U*4%Nimwhtt#IQ6G zV(-|kPm`~_V>d;%NSyQCBek{K(xsg=LWAxj;o~R z$cv_2h@XJ#jCV#NC;utOH}#@|ee8Z0_+P>0y*KCe4XWn|v3 zRiqV6TZz#JGY>G|pES&X@df$)z|vKQkY!&h4|RaKy&6NGEr_kT_Hv>UPfhB&)?jxx z`;`%knnj6L*@#!iWSO^JPh^$9Y{&N-qI0)^Bn&-rEhwXPGJe~YL{)xjD=DoHwI+(1 z-~x}fK{X%jVaJu;9L6?_Y}-kgs#DjIyv2RJ5ZB!;MW!6z8A1oJboY zG(~TKykv1V0KK*MF)GuxZA4X(d{ou^xwHhQdoO+QA*I)|?hLynauQ@3^LtG+Wr>j7 zjoVmqH@uoX{lVY4JD0P6HK9UZsG|m@ylznu%}|myZhJO~zt}34jvwR#oB2_WyrN+l zd&Nd&@%)Bm+|INqkQf`^ZJPbftpzJpC$8H4D2a^?8+Lz)vWyzEoD8|C)qlz`PGVre z9+fNMh%Tb@(t`@h$eB$28+KU+J+PF~GG|DFc{zB)%{IHz1s4?k$K9Z6l(_5lS%S^Z z+U)d+^QKKF?MoiF{~F%{skt$;M4T&Di({049T3`+Rq+A-I6jsN`;Js>|+#jsKRx zEw>EH2P1>B0iDO@a(ltgxM^_cra|_%E_1zF*BW;t``y6}G+O-i>lt>rp6V4c^N|*Y zeKv?ZWv{o0ef1ut#|aY0@Jv|)yf(ElqK2@|Z>P`KOS^1z0?IWybR*hz%X~1bAgKt# zGPlQ0pS%li;2<+}kYQgNQbzmR`q}xHLYC;_?zmrgpR~UZnP5QCXBa-~40l0b0b^4P z#-^Sgrdx{mFyu>75VnSNbPrNYF2WreYhlRnezdS9z7PoP^MDb$B& z$`>uFfa<-!c0eokrYDOFp72YN^7Qx^s4TuBI+&|=z)0`lF0L&Y((2D-t{l?h0>t$N zUH35bt=#sDiFGB_zan-qt0_IMr>|$bPujuE)BO)<+UVPyNXrJg2=DTIV-Cw zreR!{!lWDO?CDFep$yISNW|>G@-T@8O(zXO6YqU+vOwltGxjFt*a9jte^{4XwQ9Id6vTB$7dGv~1-?vMvLv)6dkMoOT(GF2L^>P;U9Nj$o;M4NNV5ikx zjwR2uo9MwD16)VoN274@T`UER|1DlxoKG_yiiKo$<|4X+r)e!yaJ^nS*T?4bHILLU z&d`q;EnM(w=8!R?#rxZwf{R$X)Mk8p&V#;cjd9{KJyT-c$_ma7v$I2vOC|gF6@U?x z;7VIeAH1A9rU~D{W*@A271$L7p%$o%L1rkStwHOK4D2CI)1}U0`anL3)OQ3;K6I+R zpU?Yn-O2@$BOjzlT{d==n%=B6{YA*{*p}acf~J@I-c;B$iuSYUtA@Rp{dZ~O?HL?{ zVEKV;*LOzLQt?`{sNk*Vt1nGqAD&};e`(oW41MG3{4Wa>HFAvA>0c_@XKsP-gT7mg zqH79#N{TjTuuBw-T9HodkzD%5%+e|JDMX_~=h&yNe@r7*Qztn`KCOFII@ekCZxSsi zeZJ&^6xj=|^(%q{CACa+53_M>k#1#B;#z|77Yv|Yx6c_^M|Uq_nQl(W=fa>@?Cx?G z4TNcVu&Ruh$6g0H%2VbeH~k9Nb;*Tv%^>W`;phLcRgAW#vnQ1(LMymS=F^MzL*X8y z3!l}WT3eF|%YUpR)T@7B7kzY|?ak>M7ZbB>&veN8oN{LUptlkRRc7c^F{rw_kbu<>I zn+Bf=uDofOI9xXkn-;kO(-}5Y0ZVnE>}4)1hgq9HSx}ZBmF#;Lo3%E*VGY%$>MPeU zi$WPxK1}DkIkim&D=|H4Y+jE;V~aWQP(}hmu9C~~Or+zz{sAiSgzjLAYI1GxKG{O8 zwqz*!oes{sP`4ee6sy0#v)|r|VK3&4cCb+Bg+m4R^yFVqJ~^GZPJP-D<_P2&UJc7lbOtoR^iwmZc)Z+VTtJSx2}m3nHYuCiOq}n;&$2^i@Bu zXuo!xa4Ru-^UxRiZ7WjfV=csaV!5=~fkco28)+NZZhk7Ha?zVryckOjqqft(@2h@w zTc|y1(-&q(jr7?ewiIOmRi(<;?N1eybEt9-96Y@?xR?6c`j};cx?)IOpo*WQRS+%wW^5UKzS`?KICYeP{FS8yx>u2%}&~_rZtv5t-Z$r&C z3@gm;x$<`J8u~iMq2ovJaZd$FT+zt*pkE7((2wqr_jE*Lo2aamX4Wg$gz(OPFOY|k z*YMIXeeX610j!ruheE8;Dy?Z_Z>$i9yG__aDEgR2Ll569E`?*3L>_SW;4jQauPtWs z=_2k{SatfCe6R+v6;4S*y|T->p>j32wm2*Ni$`m|ud|0fm4=F89?b=J2TL9pX*!uj z+IBA@lzGk94n7H)YhPz?golyL5>)np>%!#VURav|a_xnl3~lS~c?1y&^U?3@?sh(j z#^W8Oxp+}*zGQ+QrXvS+y?|L?)LZG|2wdpXE89LZoqoXOcQBr66`3q&Ej; z_iMfF_(+36*d}arJ8k^81W4*O6@5Ta@T-z;R^-jWh$J3|Vl-01CDu-KO5-IZyW!o+ z4P{Ar-sT{NMWIpH_DcBZgzTG!Dx7*c@G^Oc;Rx1s3!yge>+PwCK3I02jMb}bBP!Rd zLdAuO_f!_g;R>O8CysQ1@W8V@)JlT}q_OdO~W%hOrOxt{y@X zJEDm0PQ(=tpP1P!q3yp1Kv%$M$TcyEY8YVdZK_SP=`nv{IF7u^qd)L!2=o0**`j{i z^Ca9rPqh5J$ns9G<{!zk*i!X8PzC--rHz`Ty+MwPJNuK0s#Cf3soVA>=wk&?YbxX# zHw>_G}xqPhpL;hys{*cD59TrDD7Cx(|NUgYut_{U2m|J&aL!@iFz^qf)p0n zhm#$`9|7+lfgL;bgENMH3rsA*N-rEWcD!Z$*N|;{H2hyQH$Rs7$M4Wu=v+rRQX2 zsVBSDvZB&cr*HRZ_dCDWg`M;Jet*CJn2f`j8k&E12tMEh#W-)x2e*0bxOr|9F7{bkf3 zrBfA5N>Q46&P>LBW*m@lR)^D#2I|VUqoTm0qQKCbmSduk86Un?$u5K~jUl?qz%sO@ zu#CNRI-NNa5d@FT;H0ZmK==0td+DD2NtmjeLXd>k6+@dE>T9ChWUexuji6Hfl!a5y zq`^ucEaES!9#ZpJm6*0jF&Edr_WTn4Wc-HL>(Pz*x*~imuOytj_>!%#kzD5u%4rU^iDZLhNY` zjXMcfujT&E7Mzsq`Y8iOwhhUE8Ugucb>siof~cg0%&P~?bZSQu z{^`thxvB4Zk;BRZsqcsmWmF7haGUG-=RVNc9d6X}1hm;1us%ogfSd{m$SF;PN}7da z*MA)wX}>BKNo}{F9fnnu#ZEW8M!9>SfB2F_z6Af#<;Vecz`o{VAQFBX9r$gOZFGxF5SlL!&sDDB7#9u^X zCk@0-qB4W1v4!~<&_o+Ic>=aXjh~RTIx6o>#S$NBvsTEQqN_6pS7#2bP=;h~Y*F=1 z?_Twh(Zk)co^ZI)_@WFU(~jt-*nv&4<<7AywI_k;6*(>R@*~)KemW`g>7*irM!C4= zMbXi~z-Yj@fbI144E1BP;fgG^ah3ggw$f2WH*>)o{+Dr@S_)0en5@F{d9bO zir&&U0W}+SDs5p4e^+q3q}yRJh!NrdRS$2-E zjrozx;r5x7OI2lUz5!oI{G;7K5U{Ei^dHR6rf+D`d&-7C82dRTandXKfJfM^E7zt* zhjnyVM?&u$ThC9;sJb@BmLr?#UyAOxm!HsKq^=AGtcAqFWVq8dYx6i|lgQgl|5?gd z?RUi}7q9tV zrL=4UH+2nq8ATRioKN@Z8prY*ejB-P^s}X`lxmit)E^nt6td0#m;GYF$S8q`_rg=3N5eafS`$RoBdF| zeMg^7MDQ1EXyX}vwo`vJoo-Nq>tk`d$FeXTbyn+GR7b!fV11%zxH)Gy`{%KZZL)2t z_*di%#{!R|=R@?jE|J|%;~g7ck%IE{+{vD6xxMSA0c4Vmd)<`|i#)1Q`F8$Qo479e zgL2UiLTcq(Kc#28VFF6GAUZGvoKT*M94Hp9?Yu_yFK0Jz)$Rut6e`ia{XkE zmCM@ty87s;_xa`?5C$7Z!8}A?3%)?achaU$S;*x4cY^`g*iC3yF6wHg#7*4Mr56U4 zUPwhQQjXPTSsLv0CE%(vo!BnjiI9bBB2Vk45!-7_n(JyE;su6cZ08g zm!6-S)7I6R)7-qo8a+EWcy>_a%%|h_$?SQc2yH4H*i@(k%ryu4PzRqJjy}q?JPJcD zwfHEJ zf)v?)73XIWj477KF@a#FasHc8`Ts^#icKuo3rbDu|K?NH2x!V)0Zj=HrZor0-xowi zD0YDT7PkvD9ta1c%SdDyFp$Ejz%Cmd0Vjwx)@)~f%|4<$LrQ-AoTT&^9JsFVL$~`X z+osnFzHd8lH!fiX31|@7L^g{H3{}hxmSxf9B(j{iap*A>HToktUEwa2HXE2o15<)o zG6%Uh-2#fz4eTS(w--p{0?Lhc&*Mpbcq&l6qa7sDL8ifj+}hjKD3l;!n1<;X*GI6M z%*=<>Hm9+@t1ASSrp=iH=#Vpde@x{5n1%n>VH7Z%5H3b9k>DkQ=W*r>2tVN(bw@K} z1~OyDPM5=qOO?b9lGs#mr$Az|;3A7U#RE>U$Z8v^llj`)T8C!o+#ng_xYwJwG}wE;268_m1$OMx&SHMH6EN5@T?7bNtR?OwG-)1I@9_IUUR?@PXsPD`q0a zOzG)Z1$348{YoZM2~m;FR(L5c$&F76$#V3wxWUij{0XWSIQ_7@lk31{!cnj}qVm{? zJa)Q3Tf!p+S}eHQ&?V<)V_qMC7vn!1E4v*^Pftw56JxBMj>qD>;%ueopz#pi=8*BQMzE_8DoJnMq59G{1IAA*vh zRuG`ZXi2VG@laso4B%3EYyBb+G@p{E{||zu`+pELmzyLCwmyWGr+nX^?94IHBU*h> zMGF2kOLwR>2m5*fqx5g@wFy~mSh+%6GzY!LfVf%d(fu2_DK3ztCH=|U-0-mOPuiw| z+UUP=n~ic(!Aqq{o~y7;@uzTU{(rDGnZy52)F#Q+hNb_1P@4xRsb=Jpo8TH&rFSGP?c4zDxY!JAcN5&cFUgd87OI-`*X4;VlXA#?R zQ|dTPJ2U@8IZ10X_Q~L3FnPOKU^~-luW~qF`0pIf*Ns|Tb2kR;ZX`xdB;GTmTluX3 z;j9n-8^Y<0(~WT_t9)I?UY)51g&LK__*rT#IH5-jXUWoxm2M=(Uf zQQ6vupDML5xGN1;U?Ybg86hsk$}e=4V5Wotxr99)aPJ9>?b1JqtO^Vwaj~)&1%*Du zO?KhOS~$(+W50{&)_$M3M7cDnE`ZGWu~&^;UHGAzo_3FNi2|ReyNd}ULbr#=^;bfO z$n9?P6`W;M^L=gozHU!>N{VYaQBFLLcI$cbR@IvLKiQf8p{- zDG)7i4-Z`)1C#oO4rtYrs7E2PW>bqaHQR~F4cIwH{)T-5msEJmZQXsi@EcFZs7*|7 z;TthnD!F!wkl?p+sO(}9(=-ObT%aC?{8%Cghj|y)ki79em!X zssjlc=tTns1}uvWbCo|mn> z44Lkn)?hP=nR)1mSmiC6sl*hql0|uB_A=H~R(0$}-OB5-On|I*8g1-?<)+b36nNog zzp;Ty6zuZz=2$>n?ZaZ-GoAOcql}PSjVQ+|7zp9u?1hUODHx*VE9E9jE1QKvv@^>T z?rQ{+YE*5MF^;rHF2*vPsM(kHG~S(9YJ66d=Qe)ABxPTxDi4SI{LSKZ%)|W6to%_S z?)qA3s^IjksjjKREA&gu%Os*CEkz3A`zREMP97M~QR9Z)L4b zsBn$h!O;`$U^a^NYbw0;L*-duz$^`P1bQH%^$k@c#sZOLNYfHEEtX6ri^Wqt5bBT#suYV1Wn)D#vW3Yd8Bb=Z9KhqhYiHJu643 zi*}-nx40Fw+tA`aYX|{@==Ut_P(m(L=L)s%U$wt8zwp9C`1V!w^ ze@cjzv1q%rnzRDDGzn#G!B4C)Fcd&djiNH4NJ-%Q+jnu&M~r z^0o$#D(O!~Xb?Wo9_p1LQl6P8xghH6@eNAmlt6F_>NL{mQR7DS!r^GBe|IugX5qF# z56-@TjSct_IOTsCXa(+AT)GMD)<77?udR7#0e4@r{2HAAj$OI(AJUPp^$#b@tB65^ z6fk8qte*RSL`cmv+`QsutHIRN)n`FMcrV;(Og+A7SH|HnX${$B+`a_YS`q)U%oSwB zllCNh^|DxY>|J+lKM07J6fq(|=jZdSR0IP(NS$s-FrYFGndhufvvG+GV!$BX2~`I$ zMD$i_KpokK5wk1o>x3MkE0#uOD_3p!)nG7r;HX61mYYQ(|Jg9uLP`wjvx-Ve!Wn{p z3NlZWPt{M#Km=<_JG$Ngc{iY8AiAO2vwoqWPd0?cKB#5zVi1CAJ!FZdx(`9NQS2o*qvkx~|)U_=bB zBv#G7R=@Ajbd;<;;CXl9>SnewH;VTQNut3DG2_Pj1t&_NvcZ{&A~-GzLt0!a&|$6E zVE49li6~zz!NarwKwOfo<02L(m+2SrOynlz@h~HLl%@jK~Mq=Yc|SL((z#h-?$KaPiqv`7PyvhkRWtN z8JYg1W}Ez3X|r@FC5_3e(;f!|t~JnPJf#EfYD|75ZUQbl(C7~v%f!zh(&;TCDboS4 z7xDLp4VJ@%`102nF=r}V(P(0U76jU`_$KZOhcQ?Gk+P^IKJCu^M$M#O9K>+ZNpCft z>#7Pv9H5qjnHRr%U%3?C`cC$t;kP$3vnC7hVwBbaFNPHkhB(dfik+-vn!PSw^71dw zN)-GO$>n-6{LjB|Oh4+6HhxrfilPS4^+g|xts3_t&+rGHCB^h<)$B!##sYb1F1I+5 z;gvb0{;Lo-tYkFG9OJ!=LYZNfB^bL)7fK~R>Hoso7VugrUncdwRxvRxt{d$k0n%e zs!8?GF3B8mEq^kB`K)}mk(d@GK!mtB0V%4cXp?$llRBw10MZKNd;B>}w&8Jy$?Wj6NMGt58S3rrvYV{2+CLhUYWqAlaN@`MfL^u+|y zA|F%;@YI}<%L*Xy!#Vmjg3xpgzyWApnS_7PvF}X4fAPY9jXeN=SWw|rLMNwD8b8tp z6_)kt(o5hc8UMn#xj{AaR>7}Qf73$;(!|WfXqMWPrAC_<_mkJ+NumAYnDwfnP(M02 zG*g|DLRwr|YVkzIS@?@$@KZ&pgmIly2wKftyWsx*gpGbFrC&;Qs3Ia;sKsKd<;oID zzn6|&Z*Cj6LCyVDnzn>2m&lAdRV_9$@V&gis<7Y(t!*>~E{qX%D){Q5zE%uMJ}yg| zD&Q?4;RkG;vQlU({mtAWR(G{^VF+_Suz}w*_esS;<43E%ibv$v!MESm+`kplG0e=M zp?hF2>GHt_UW4u++{UoE_G6TXZ5pVF=Y|$BZ)u;=FysH^ck@S*L?#=bqerW)vyZ=N zJa;s;^qH8e$nGidhtJVrH;uW{VcYXO+HX4)0J`OlJh^c&{@C+EHY`wUR?MCS&fL<$ zc!R0aA3>cARp%9-+!Sy0o^6OnZYrIj`{JR-hWNti@sYOSF6h^XIaX)3$F?KPTyR5P$%CVO+&{fMV1|G%890@T>ZL|o|1$~$uI*h zyT`|=uEO=XtdtG2G1qQxV#H~FPG#lbENlM@;MHI=m5-|^BFJ8yH(n)J0u7!u zJ>Cx@M%Lf}uRh0zNr!qfz=w0exw-ic{q+)aQ-Ud-^*mjmWewr}AmpNcO9!+!U4Fh( z=0FAsHfL=;QxQ~Vv$l7VJ3grVL!1h24j(RMgTjP;x>PGXZGCgJ#V@~tY}7nTZI&RmNmf+ z_@s;uC5]CI_^Ia6e)9PPFO>?%k_+I#|=fAoHtFtO59gOIbFjRlhmGgo&otNZ}5 zaPy@0TD|7>fsfdh%Z%$1jOZY^!N=-YIbQblC02Wh{U2HI(X->%o*kcF>8UP226wpM z;Gv6^=|HN$$=TA289EI+OiNaSp11|Aip7xb_0*Q5FmsBi4N_?olb7*-99ssK-riX59%kXmLLApYc zgxjD1Aqrg5L@er$O@lt8Lx(OpWD|z!w8DSqvp1tn#VcT5vsAP7!7>2+{+hL=Ocyhp*{2;mULQKX?Qp+vo7B$6M zys;bGVvS9RD*iW~S>L4UdJH(+AVgh$tl92n-lV4$vwv2Mv1}QQFuHZYZ}^;w1#y!W zsafEMxs7VlB>sj)kHbEN(SE2_1cE(%z5FCK3vw{yar+UFfHxJ!TgO=3XPm{bo)pDQ zAC&ywb>hKn-^fUkuM4}aEW?n)fF8q`RV6IBQ~o*$EV;m~RHbZ`8JgNqw@JdHLs_SO zTzGkIXL81#tVTc9e*qj+NJWoLC1LN>bRSmc=p(@T*y??;-o*-pupAB=mN7UIxZP=a zG90ib`hF@2K<(N+iN%}aHYLeVDFFIu>IhUL*H`m?#=Xo%XfsR+iBrO<*n0Q*z)>~y zXyf3N;~FG5jvc)5S{94L{><_rxC*lj{7gOzOQB|j5P(sR)cT)z$ZXci(lVyoU0GQ6 z05S`43z$NvYZ~a^SONZZv$~3lB{>^Tt=>gVDg5X=4Hw1UFC^9U2KmT~NOax3OAWNl zd#Y4B2mtWuV~<4P<{gytzhi|1=Or;*N~DGN$gkRNJn#;nIa~+ZjT0yk^FP&%9!fuF zT@khMlx1%+_bE%d2kumx2Gm+2rB~!e8|CSuLQ1O=iXM}Tr%ANH&~QbWoIC}QsGqAM zQ`9mc0i0&3iRIV_B%43Xjx5#XU~AeH&hFz!6r54bw9%gu>5ZB_3w)FIOf2^tzg9i^ z2B{sL%6y}}FH;0c&2dEoESa-<>C{a7RJBYXb}?MVJ(*0RWT*DFOzs2zKlGY^DL}7z z-SeY(%auWvSt73eBFWs2gJ5r0|3{kWYO*yt98Gq6i;G*Im02doe)nO1^eN*~)apJe zER)k$bbPYJ_&kRID~aVLhEZg^g_Q9w?l0{Bt+ZQ53|B+OTbK4S1>zj$EpxhDy;=YA zcnetMT6IB*jJb#tS&%GOCi+t5d@Z$XiXYU-7Og~3n=BRVeTO^GnYZ#d_fDF>*X%MJ z{d@vODB~2j=+DMMU3a+Em z7g+3e2hJC|6O7xnL^oe)u1_nN8h7M)(fi3sLY5TJuJrw61+Wh`XT^1@Mo%1@n`JoA zZthoPevquViZ=>DC4M3)j^1h_`NxuyaN*^`$7TLW)tD5Q@;O(oqKn2Mjl+tS3Y0SV z%2!_)`7$k^OEY=TLEmiU-c~co`i-)+xyd!ZjfY4Cw`pHkJ3qBOPDQ^+2Tj$HQ+X#; zFX&F?a)(vkZ!H0pa)-&Fqyv(c&N#2pvpcgQnY&ZD`r}l7XNEC_`QX*X>Xq0{4gJGY zddl*@c_;U&Qf%g)Z*9#ttZDC@UOU>+0CC`gSHLE)SlXTO6{Ailzu$L)37fb^sicc z)%;b&83`ock0+MMt=;6EQb+(}HB*^;J8? zt5q^4e?(#W0tF%Nx@2RM>S{s~S1IUt%K2kiBOeyG5W?NtXem!aM59Q)*b;CTYo^yg6HC2Pk&~`2O4cA{GNKFFQ3&}WK_ZyakLX>_fMqd6H${gNYZtS&mF z1wo7cB-aU?VY`4DKBV1sNNZ1uX}mvfc3p!i?(0KZ?vQpw!tLjhz~K@oM!ZFybQNM0 z(vIfQh-t%>S7&{7v**Id%RNI{KyhjYZeGNioQpsuF*201r{QItin#X(vqQ+lNaF+0fIB z%gc(~x%3}X6nXh8`xCx4X|*P;uBizQfu?Zt9G21roTX;m=p%(mRC^rwXg5h^#gI&KJEY)aBxJD$tZu(mk^p~a;)cV8*RwU* z8q+pqiiKGTPuz9`tV@Q?4|Wy+pW1<9K9rBWSb5+|L}peX)ZGGDKS))+E^eZ(bB1rA zuNw~x`h+a3`Q51Pbp)`g?rZN}L4Uhew4#!}PmyE4TUXr$JA4SWWXlTM3M9^P;_l6T!G z6SZJ(id6%o{eW34vj9eQocFpgQm|;4(eyM?XTNrFVmbBc#~Kl4>VgY9JZkkxu?SE$J^ZkY(wP zEXSnmW^~ZNO3Aim*(PN-26}))=t49>+kegNw*|naz?fz z%Z1;|?#)IQqrX$)mStdpMHf3GUm919JuK8EJ)PWFSuh5V7-0;4OzHXKfLzv;x1?*2oZCO8=o+Hj z$n9b!>xeIUURiox$vronmQ&^}vYZz3+PB?60%Q(7IC`St$prmcWzElt1yYXhNf>-V z`Rp66yJG1sH@(R~ibpaVA5j+ERHQ&#J}>9H_jHz~CR9R%=2IpRMJyd2}^)QkT^8PNs*|q_|X4 z^+d*f=|-vkUY03U4vOl0`bStHgKia3B7^SIAmI`uOtEG;&r8&rLcpknu3ay9z$l+F zUS)(^Au(-|FaoSvkV-*!78wG?C59=tuS^(IXbMrawv{hO*K6?AuwEk*Qut^(jIZ55 zlLWu|ME@T9=oXDCUl11A=`FNyiv~kL0Hy84`S!q#ne7SwlOs=Qe`M8)QM>8W@pQV{ zIXlHrw$dJqR}5nnzLCWpZm)V{=c8x ztSKrd-@R(ODEZ?lk7o^w-!tQfH`rej<0)0Pd`IreL-^*Sn}tcvvY#E%epsVap)3^Y zZld40=r3kWr+;$`rT2d)%o&(CX2RG=b3RSgg|Ij@2{NZBA_4qP%Te*yhrssd>5w_8 z3KFR9K$rfHoRN=dC}HG++{}!}oG{;kDS3`A^Lfqe^XBI@n3s3Wnm;_dQXfdfY}}!P zzQc)7MLRcL58W4%tS-}YYeUazkSCjYE&Z=+iG9vZana{A+;f^g*q8pG#TrXLIcMX_+F?$8<}1ko?+SnSYT69fSv za|@wWK(%*+VRQeoqT3o`l29Dm!_u!S0K2Co!GvacNBl}ZG0B$gxL9E_3bWj zM1%aUAP-d_|Kp<3!<*k=5`5E`WRiXHC7+(*muwJuBHAk&u;sm9Ywf({zv7vXv}dJypN@GJ6>U-x{cIk-OrOnT%e#7LIK_?CjiJH56PS643dUv_t9f++Gt@)W4(vdETGLTA;#z zxk}a;Xw0ECsn)4I7}e%lfG8X7rrFM74f!0wV>8gWw~zf!|WO-<2o%)&v)fI5eIEsxvYx*eFFD&CC$&BV))RIcdc~wFGKsIRQ4SEGuD>M7(O?BdL z0uNd4tj!c%3&wMXMG{{$3k2%(r)%jxh3U@vMSrcer;*GT>kXj9K|kcSI*Rg%{?ugc z>yt?%;6Y-%~xtROs?rhj%RD1 zAIrL#zP^wTPGvk~9Z^@DSVI)0f!={uy5%}ia~3@{fkoeIcs&8NIc26e%f1l_iYGP^ zM+2k)u-HuZ1^aO+ZpShv%f6O~D~5+isc()w7$aY6HrlZx&J*5t{f<=Z`5;crhGCZO ztflj%n*8977^?tc^`j?N-PQ*sT<^e^809tgUx=hU`YSp6D>1lXN6m(Sd0tU26Z{pp ztza|vV1i7jhtfy$McqBL%Pgw3$h{E}Qkqe2jQcCLe7us+Pj%$$yT zhd-D-7oFEIDlbBkL6El3^Hf9<81*&wSO%o;D9fB=8TzeMmG8&~dzXr>+Tsn7F=(;iVK|m7Z##uJ5yLz?AXP#U`=;~x0>f2)$_T9n< zci}Rs#36{XVmk{sC=$+dosh=RpC+W04vR!+D21A{XZpK)>Eiixn~eB9udI~{pXXmB zrRwVE7v<%{MJvj69%99*fUpHn2=DWUX2;Ps$9LHExvD1tDy5Ku% z#WJ@B!#2wOhLYjkvBF0N0iRO_+BGqXK^yE6u|*<<>fMOVJI-^9w+@;q2Z2eyp-K8poWCW6%8-4$g#wu%}&xc9Ez2s1B!=#@9K!PTp=)220Q3C^JHyEP29ajPCO|V`)a~e zp7@z4W_050&^-!KV@TURZf$QYsnulTr&lljpJVJlgCDF?Kn7{I9}36o<^t5%>W3`( z$qAyW0C!V($i9|g@E#eQ&#OlhHU_7a5fl3p(O9+=TQd!`$v*m8GkTgcXk9Z4Yav~e zET2BODo)hfmSVHR+PsiNRm@2hla8Z-l>2OmQf>0rhdFT)ByCl`_I7%xM+W?h&v<4n zFWlNOjJiTNyn2zN+TYO~#$bxkDdb-&e=%~iFsR&wVGP4z^p05j9kDpbxpvxn69n{c z6r~jnuVPRS-z77JpgNjViA9>vFk?6_79Z}$Kaa<9#=e}v zJ%KJ|UbZ9u=yE2t&9^jL0MIo0HpoPw{*gD=u4abZxB~%$s1`2p4UDL!n@X-nl%s|c z@9<_uuc6``T-)**4CPs4F&GWlU}et_?7FBGE}b<5HTyo{oMN1PK%$y~F+RJLnYwX1 zgY!K|nDXIOB)ToezAXkLQ=Z+Ohr2ZY&?AcUYF`K-i#Cik2*@|F+ZCdPfR z)Yon5m?mmN7+RxvYGh{|m!2oL;C_==V_vgtW&lc|Pj50{GAR?VDE?iq39tR<2#e3L z$1Fp5V;h7lj@Z_)ieK_mbxxKtt;c4#fY#H7jgUje<;+jo{w~^@GV*A}5)p4}TKsY54S~dDv_mr*JIyk^`-p z5_n%b?we}B`yv?q{v3886Uzmf+U7o-rJ*O-$;k8*QlEwVcp>Qp7pHXXzl6~!hDoUz zW*N(>SW#(+MpjYe;rP1zox>XlP7|)m3Sn+v6I-w*HVHrrOs4L`Fn8SDf@mUoE>%&4 z=27$Lfwk*}!Qo#-Qz=~JYZzRqzEBT0CtZdbQ}5`MThZ$G4bFk8p2g1tfA(D878#iUruBJ(Wi?gYlmaW3_M^pjwW7ERt*&xH$#Ig z)d0O)kB^Awy#dcFj8%8W72FvIPH&A-Fq4RPiH{C%p@7(Cg-{NxK~AB?P;J@0jpC>j zh!}WzxEAN(N5*5^7{X8t1R(MpcRj4c)o>$WW<=L8@2p`|;F}<5m^R<_@ET@vS$72V zu10HiAoBu}wZM%j=y^zk6tR*pHL&1iB4@~#O+wX~2;6B2xe7=7BxLlWP zoA#IMj=fE7Edm#H0IV#97cJ#hM#2{3iWR4crsvYW^;X2A1_yk&(?V!q(ymr*{5EaI zwsa2wUY*@woz(|TkN2}s;F$B~(sv?&IcUGRMH{TCson+&3jSBUEpu0hHP!g}8;49M zJ#;u(`}dkL+Qjl+I&FZa&9OZrbsh{>~_JKRFsoo3E!x>Q24;1jl7>n+MA z^ojch=Ms9fmkuu^CVZRNYaSt_Uf;a|Pmn1q;c{niU(Mp)+{#J3*syI_Z~CQEUwEtU zadcP)7bw-A)ZVK7*vCD-jo)A9`M`8z7yr%rXw|JPRk!Nj2y=(p6$0JOqn&*leYAo_ z_K<%4Afuoocj38S?d7Iz-drt&5Shm5#!`sr+VX-g=Xzc>e|{^}d-Sr0&i5gBh3;AgEak`- z+=i`5xCp?`_fy}N17C70e4K!~PSplc36 zGIDbxkVey8#SisUEpV=WsI+D@$}op_ADd0BE2wp=LWngE@YW#dSGS_jzIPy9pvwU< z>xT|j66Aj>J@CqOtb#hARY%7ky#Qz@(e8iy0TM`gjznI45O$E_@;TuQ{+TV9T*0&3 zqJm_#AHs$y{UTiRS8h>-0K9+(^x?2Ej)XG1*MZU8;^!3WIs0-5|69c@bI!hEWh#ax zOrw5$T=|nJZnBi7?qF_`+{j(K6)~_M2`vGmc&L`6&4AUZpF-gtBsH5@G0+VW4c)s+ z@~fy^z+NDoYnuR3er$l*LaHhsC58icqmTKz<;xpMx%_n*H1V~bgIfdf)T@%fBsJaF$WCqd5)@ejt&(iB4CQ71h!o%?VA3AAYU8|ZJ-srS~5UziR2Z%sNZ zhl631-Iy$qza{Nf5@8wyjcpgC>ezOlEaZMz;k?<=>`uWzRs`?^AyB9QMY^db?ZwNTWn>R!@lY-9X=4Aqq748i{|6$nyO*5>U2z zm(rMgk*~?LRk~)caisr~u8&awqaR9SLfAEP3uecR+nFD^`&(pK4}?cPO@c5z6!bUK zknG5~V@q*Yn%`|5H9|9c`!*^zqjBF;eSp5LiuKJH7>uO{S^DEnQB@y( zZs$zYuclA0t+MCYAsB>FALh4W?p-OfDjFaW=#!oJdpG`e{rPv^pMSfz2gshA$#ibL ztmrb6UhS}zRobxm&Ubik0N}JU^WN!nbLBA$pC?CyuYD$escAB*SIwRN9GieU1TQ|1 z);-MqAk|0a@gGTw0FyZ-ZKDZWc@Ns!e+3rRm&EOItbp9HB=@9Z@Hksxy+M~5o! z64~>aRBk&fh{=rMj8@aLI~hBazd5q3+&V~v9newQ8%+AwrL>71D85V7*lE0IF0bO- z@8Yi1vMUo+bVU_cJ6lshHLcoP?&5EsO?=#k59wc8AklFUAIZX9SlQhS?2e(jTkp7w%NkcY?k;+8*4Jkj60hE}UNHvj=5-H~ z5`M`|$MRU%sf6mBHgh0TR>O>UeYg;&m`JzYxeZF>4_hp^Mfj}&?eo&ijnC?R4Jh@u z0-~FTGO*V17t{HLC>xj7?mE9vST}ENXVC$nUvDYBSQkCNkUzhWW{=xeps$?olE(92 zx7U9`44!|&=hU#99D540nmunsGL073K>r+AgKLFL+Gz>KMue~s`bQ1!Zy+Ju#(i{3 z0@qQapEpjD%-lxojbPL^&tvj4?xB?p^s@?jdqn0$IeJJ;3}OS)e~?WzbsW9#HhO$9 zX*LhZ<*SwqE1jf#b=5YD2EhZCIZ%;4ShbCL7jVBT9pFqx?(+U7)>~SLF@~O~aU;$! z#|O+nzRlu<|2ArSdcgMdOq&ZfW@~P)(+MmR(lzqY`=yJN71uGgc#~m)_^&5M3FV&C zEot}XENSnx(rL+X!nSu1+e~C8+kmOFIy`&Dhp@AczDJbrF|#TrS#~8J*6wPiLO}FVQk9 zzx>|lTV&B&WZP~)MreOU^i9(KCK=j2dM~);c8dd^G%NQ93%m8AcZu^65xlpr9~*f6 z*gU7*2{A5A$j&^AGanDM+I={d`1k`jgEz2QI0sGmICdNuXY?C(<2S6hTcGjv9M;)$ zEeMbS1S2m#m=`bp6l$Pw?_%F#`t)(`Em<@tW>HRzxJy3DS4uq(h+Oulh7F{yGF2SWZ(<|g#QN8%+L5c^w9#}n`lR%}C#9mWmH%l=X# zD(ZaPaYz{&Uwc4X=QF*POpCTsl{{MO!pHPKu!z%d7N%TsFRrM6~=|zKkD99O^PgdRjx(Q2~nA>H-3~UZ*G3wB-Z>N(h1;cc2vZyc2eXJop%0qaD2}1~42zMLyKR|!f>gPSk zKd$3uKSr&|m;gTBOdn0=+mf{x^m=qZ?|O`TCj|{;b%zSUkN+Jhu$d@w_Yb`n%CZyx}-?K?e- zr7ZVaipbDoygLbNMEJnMOjwB5bj+~4Nz!3(XhFRT6I#E1*Bz?9kdTx>SLj6kEdBBn zDo0-e*$;kYU8sk&JlNy z&UmN7hwz#(_vM}R%L%~S`swkTWQJB~YWCud2P4L3Qd=Po#$tM@!8awB#V1E1gU_O7 zLZdNznYz7Bw@k{#p_)=dcF0ycI*g*kPPd-_A@?=VmB9=Y%&;&mACP=1EasBlnFF z$NT#yCbvLSje3sY^uo>5;Wut!yw}YY;RECjFs<0I|8}Dwbq`?4hs{GD-8ZGMo5-4h z|4;h{`-rq&?sy{YGuW2XU=k>?t$^S!MJPd=mQIl0&l>uIezmF@vL$~Lec^GgV|mv) z?Un=Dtn|&#$m!5w;_bhOHaw!8o5Zg*tho!<^K*%rMd`TtzMm=TGVnVlF`t4zgYLlJ zx9=IR>hSktP0jVZkkV~$XPzh^Qx-qrCg_KJLVEiaD`0K&H4Sa#JDz-E23gb+`_O@7 z+1k+zV%Ysw7o;s#H+Ov(=eddGnydS8Mdt1j6&PA7R}kw!sIyD^hP36U z1pSqlxIDaltvj8@Dl;Bt?lrwJDSGVjCyqTXst>-@V9ZFHU-(IaYtEeptof{t9A7I? zL#)bT&j-pOtvNHd3#?|kV0z?20`HXY@0bjhzOLXHrkl@xl;mjY&1DJM=V1r>m|omM z7o^i4m$PsuQ?+c-Jw%gl3@}qTlZJ}u>OISqcFX5dT9HujrWUZjMCLkO5gdq6LJx3@ ziy)#7P-7jul3z=esmA`o=7hqE1W~0=4vmYJX*s!*h${Q&x=?C!OWPP1dR8axg%ZuR zCc$FHpf`xb+38;+c?QGNjCut`bAd3|)Z^NQpWU*7$fte1f!KCLpJx91*XgI3$Y=6J z@5Q=4l&8B~ZuGPr)|FFf#fN~eX+JR1!K1LzeKwgC=$%L98tOCCAB`eYi+;{QAs?vR zM6&GmqjuX-V&%Q5=8JNQkl@Td?K22@%Hltl^X44w4|3MzY33i<3|9$FxCS%#a;a#h z-#$2rFUx{%%p|*ypNkbO_vj@0P70TvHu8c9K_l)h+?5P|&*L&f4|q7KM?g4hID!@0 z_H_EL5xAg3`()VKX7lCgxWD+s1W|vs_Q4~KDo=vY&v3Q((?JDE(TcpcwIdH)f18w;cK`BR z(Tf)Cij6|tG{5OFL|pvzT*^b-<>#W?iE=x?W`0@{*l06N~k%SzIn5-=DlBIl->*Ncq2G-E;yb$YY1-&uc55Xr zOVM)E-{smrB_e#bLrck z$np@UMQNe=6?u8S_C8J00+Y)L(3@$)(Sk^}DBLr8H@*2OE|yo%w+|mB?Y#AEvUun> z&MFaXq&enz1Y6K}iE6QJad*pNt?*Cn8Y{ksl#<1nRjev3gd{J{jF&J@lkijgmuPX1 zUidRS_KhOpyZl3H{H_j}$cwQ>&lb1Xh413$EY_XoIN=d-5;g~{o9X5ybcl?6=Xz@! z&a1hfB6_CTC4Y;~mC{%hvK6LUdNi5qC?}!)Lrg%DsZm|-*M58`z!l8RJAd;4`aFm~ zgA_hLk6U|>_!gW3nzaDH_6I8<5A-n1^fd;n8%IlrYQp z_nDIwVt&y!^b36v`fe$NM=x~)Keiy(u42UcK?S3nY?PyV@9=hJSD<>2+Po0>!TmZD zg>;Q!5DkICch(SHt$!&Hp_sQftq>Y{jZjqk1hGllApR((C3Mdby8rLoYY*~gP9WRzKk15cNZ|dM*`I}`L zv54d7zW2s#WXgWxY2i$#+HYMxb=Tcytwi7g(t6!yH(`Ornzg;iGJ!y8DfB)r~jHLLSo2IPeSPW(SC*9 zU0}0oD`naTKd{`-(wQb4H$`Gqj@6h~xN1jc#8jw`<=>Q;ev;gx-p?Jfw;WA`DxlX# z$5n8TYni?DM+u^UO77F{m+UC8-zgZ0={hN@P5ieP=xtK1VLxd!={_9u-_W;Jrf+5b z(|p}CG?()Xu%&@8U4BL-FsK7fT(Md!l@HZHVpf9$(O)kyiY1xmXJmA3Dpkq&iL$^Q zW*Hn0iQ-4;tt_wyT%Nt)L&n^h%k9X^!1b6wPfpw;&1&tATrUPDa^l26(e!82ceg@^#QCQcNFJipwRd_`yT8 zU1ze4mwaSS;}n3{z#+J2s>t8D1_#h2E;Fy;1fWY;huO( z)(k5xmA*)KPef+evNl({AVX;3C@*Wl2+H|)R;7{kr|&ww|MWAbUpW2L=@-tdHk`iy z%$n0j@Sj7cpFMrx^x-qNoIb*+-8bO@&z@O#`rzru&TKe+q@bO2p1v24dExYv`18Q& z=kdP-XKun{Zan?`nVXsVq+R&;(3zVgm1GC=zX*F1=%&iG?f=;&f$TJSlI$kCY1)RJ zv`Gh=v`L$^4N#i4Ne4PW2SCP?CTSb!MA8;o2CWDPC<+SBNL4t1f}-Mtc&v(w103}@ zk9ZC$4mg5y{a?H2`@Y}$fA9L%%7Q|Zon#NsexCcjuHR) zJ*U>4TF0zY3;5SDsYG_<)K+Q2w8u|AEZ{AUojiz*K5+5~V=eI!*`AXRO1F~-r6-x| zS3i34v6F`cyxBia9>d!m!KVIk>N5Q6-jfgGtsgFd@%YxoSwy-`acUFZ;ot1jL|{6Z z_kEY1{)zjEdiG5)D`uEyyjrpaB;!R}%Hou%bG!bDz>&T?zN;ErL(0~WJASAli}>3r zBlgi9{NA?^lyz0nh?&tUUmZ>-y`zKP6O6t)hmZm5;MLh5B+R*l{LH+6j_YOjjAWm! zvP|fh%P=wvol`b=F!f@m#*+ z(>!>*ZK#an8M*CIuHzr}XX9-e|6;gS{2sH(%yr4QcM=ezl^z=RDcGTgIop%c9?e|| zicQ2!N^=m>3o;)-W(7qquC^vnYW&EY^@|J+f^(Jc87kkS?dyy{r{A7PB4d^BU3}2W z_kdf5Ux2>04Ws3Ee}_QpkE!-Y6|^R?1SlEzc1bCuDgeh&`F0*{%d!Oa| zIq{{E>n773Eayt3Z%Y|bvq-^WrK&t3tkQF%3eAfA6xj$4B+5e47GQb|z}r&n7IrBHZ> zlurCz%%>GuPs#UxLxlM$r2=Sj5M;aa=P`&SR7r2ac{N#SC`$(Ap*)#B?!{#-s`*L6mS(3V zTVBkbq4qZs0KPEN$m4A=epwEjp68qNt(&<);c(gVD62L_8Ix9!ZqTmDUiwksGENX{*05Kf&7X{!qyMmJY~BXGx)bge%yNM2!j9Sddo zfNkzAh3Q=e6W`cEW&oA;tzron$>aE|5cs|Vn(h#=ip2iHODDXvvd1DW-0}rsKXSfnJ6BZ93UT|I^(58gN`;@TTOW3+Y zdW4IE!dW=BK(=)OiqztH>I6{~%db3qyS2*+f3sX)SOBsel=fVFibw>-(Xfx*EFZX8 z&J1D#1?f{!etw?mBr=AEBm+Z|S$G$5Es9N{p@35cSBkB8>@{ihH7WD49thK7u*U-y z@57MusNyAg`C|*1oeLPNOeSJGr2`?U3bHb0LmR-q;06^nzsly5&0nJ;657 zei^$=GH{t>_)X^G97r;X)hI7DCK>tv2sj4x5+>k$R|A%Is0?YVXP60lrYwQZi)+;2ZAKbP< z>|lJSR)7cV8v9)`^t;6LpV`leyB?peaBP=qXqUx%vvlzPV0B)mL6Zz_mA>#e+4H|)o%}zj{!dyb*P;Cj)~V2v8uo_2 zSe@Y){ufr~8~+bhr;YsIQJt9>^vw8D&{THti*(v?0LUz~J%a(7hJ~9>DgsI`K{&8l zCt^8WczUSEGC)&x!}rXds|RMa$W`E8@A)q{=djK;Iu7SVFt_Og5zhH>_alr#?7noj zid0S`Z>qgW<@|wJAOp3NskGd`b0%>-!2LtV>>melYA%8}htk1JGvf>a>q|Por2S72 zr*@o^+M~I7oW-g9i^XZ#^WQ8^t6HQpcAFk!Y|LmTOdTL2U^M|+X7XPgP6t{!?!P#k zcL#(5b7Kd3#zHmFm~eD^$*i7E5m;LBC#I~W$!lJ;W|3FseT3;1S(BCvQ=0Z7IL|UovdSE&2&ZZK*_70k1+ATS zRHmWy3Uq>*$Lvn|3q0wYj#r3yKNPcJ{bvPNwo{L_UjnT!5ycu_5GK$sL^ ze1RiDv?XBaXAaL0k)1`2L*ZVC|Hi!*uq-k2V2Ok?Eb*L-vB-2G==pwuo)4-#w0`kw zi~tIz3d(#SbCB*=19fOjbUe+f(J)HuSOQ~70F`ZwXMW*DQM%aOg_5<<>E@Y=y8dVs z5!sMt;+3<2bBHp(x-D_3=}hXmDRI6jZ2K6;oZ}b^CjeeuXoH&C?dBMShW6vvogu(8m; zzK|ZQD7OnbfCXBQg2@Zk!dml}5}x@12N32>{ke<@;S6PP&nskr*T>s0pM*f0zRok^ zK4%pp-=@?zl|uttSWx$`zYv!E0y*)B?98a~(#iHZg6L)u;>j-pjYmV@-ifAPup`2} zuFPbXeKQZ#vySTNj;7plM_!d9ue{S)?`+5of`1KRtqsR2)8Wi2#(k_X3zN!mS~k$j zKrMmd26T3A2cDGUs>;FtCj{F?((6fG1F7;eNB+J6#0c9$)v7@nU&g{DC z`Av2H@`}m^TT7MS@Ao!b(&V+(XH7kQN(Q$!w>-Y&kQ{Tkig-mXFy8!Vig7ZlKuIIQ z?-H!%YO8D{QMn{gTSSqA>3VDcdaw@!{){L3r=77pg~Op#jWaCiz}$K#eK zkolHs?rw#(LwS+8`FGaJC}jmXg5$@e41ENEO~{j&VgxUua5SR)i+hO(LD(JtlOTVnk(58VWo=B4MPKqYlXZAUe_1+l zEZ(*>aQ(>}H0w7D|3$iH-wcO~f}wHBYwi%JYC!h+zTn@|ctLEU6)H8JDU`EChTUq3 zP$6|GFerx%x2YqnrSZ0heJ3fS27_erBO2~m(?}=NOOHY%p!wA0nEm_V@`y z$s8IgGgX;Tp>^i2v<165&4H4!DV$*n$94-}6{#cIs;)CX> zolOa2@#>3@?eS{ocVz5y)y2o&&sESkkDX9md<>mXIlm-h`&1VnYxk*~Uy-qms*8_- zjVk9C#Mq{yPw8qNP>t*#{ib$D`s}uZAnbT zv00D~^C^kq*24*K&|Wm04FJnsZZ{+E2CXq?3i;)MO>}~EU4-8OqSw=f$s@B|-3obyujFGw8j}*8%P;=Apz&gdpeH84-ZI20Q#wU6 zDSU1$MM$)_WCJNlAA2Fpt4Ay|@lz7K z%gpYC%gl8m*90Sb03?lG;9&bP?m)p%2JLqb46``nDll1rofG@C55ZcPsevW*hr*tr zkRav=ir^eBqA^4{vTFHF#wKn}cm4Gfv_(bm!I;sf<*jnwkgDvp_b3po~VQL-`Q_FlwV`m5cqEo2@oqt;xPt z%^KX-sQrx-XRiZZ)3ch?)DYVoZd9}<{7Z!zif*zp(W=!nUO_0609+UbB59Et^!dOL zUnX1`3FUgR2rP2B%KZK!pS#@c@cS!#et&VX-|xf7oadKrv-;F`93uRa+&hPe9^&Ix zG@BSD`YwI7iD}(K3{!Zzlxv&= zuVh|LjtqsO98Xc4ZSN$rRbW7bm3g2W&pg4GiWJT^8d!JG~Qzl zJkQN#*S~^)SSKY$cJude{PpQ%L0~VvQw$IGKgQ+E$7n1JV4Vs9$Q~Jm6?+Evlh!xH z9J?bYLCD1fG|(CB361t~UXCx*>1xK+>lUfu188z{nV+S1TECF#aoGlgc{Xl!g7Vs= zRO11i+T1j&V%{DFJ0lOj8&5>@1_9xqAIjK7_QX69jV|V>K?2>AiySpkXN(**FxQlM zrhr>*pe>13lR=9YPm7ey>5?j@Pvf@^I&Zv;-R3UCZ# zXvE+dMfM^rjve{@0hy#;IK4wCLM04!6o-@533D<3a2Gp;F8X9@5#V3Y!Q)O1*-x2W zus$tE)eSKuxP=7~_6`r)5=#Xl5AbyGonupl2#T2Y9BL9I6%pPb%fYXTFdGK!E;`6| z2pDLbf*+rkNnpGR&@CKNudY<>z&7*5B5biVJt@puFi%~@JN<;%O?B`z#ZG)6_Hyke zZxz-Jm}7~~UQEts0RRLm=s|Y+@vawA{zPH-`ASkJi#`yp+e+J$5dSE`l@TboJ6(po zU2HVI5RcmsUmc%)x%h+OOF4Z&&Zm$6yj9L5@XyNe-}8C7r$eq6tQfhb3WjG`l4JaZ z_123Sxbq}oA-hJ#>|=kZ73Nh}SA#LskAVcpLbP5W8_%?Mf&+OG1`B2naDu@=CXGzza28tEgMz(NI+!dN=?_S)gGD-s3qN~9&$L9doBAbwfM z#2;DVL>Vv&^vg_oU$PnnK#x$iLi+9O=~vqRP8ygei8*i+#M)OTV?J)zz(~~@2+}m! zS`)n7@ky1kSv?UtW|3ia22W0HppU*KXDXNbG1e-RvEg=23b8G3keJsnJ8S-(JaP@^ z&)o6tv7fTmqP@S9(%q~vma_kG;}eFLk6|}EjM*$0@$(1D^ojyd_0N7IwiLQTBTB0w zmBZz7TWa>Lsl;sbC9#{ccch>tUXsR-bKw78Vt7paH~og?cFB^sfY*&n^z{^y|N0|I zChn?u(qsKOg*XgdDtbP}a6p~|qft>a;;B%YcG5?%Rr~4G#p%R&NG}PQmG7IMNfm|K zl*H%c0ays7w#WsVZ=NofM0$*lOlw}Im~R@Of5^G0S{)RVO|$+3XG?PY=rL|#rf8hV zl&vu+=`OXgU;fTznJFT3FS??ygK5M$;e1j}U(ED$Wui&h_=k{FhLj*2WrV39?(+Zy zVKKU0Q;ISR+#@0sTy&=Zy-C$VDegBzP>QX@jSo-9SkAqZozs1h`Q92v@r5SRX&8(U z7bt+g7VGVR5N}u|fvsUm7QHK8kDFtl6DF8Xr{n?39d7Hx8}|kxQN~&j0GqP46j?f|0` z%l}MA7QU!EU&zFgr4XQ3)CxRYYc|Sr>S=UvS|;vlScYNtW6S4nN~*brY})HzN8YZf z0tgj@O&z`^_?*~!XO3X}ILmamVSq}%Wz?oq20?|k;RY4mJcX;YuYHXG00#nStZx?o z8N{yx9}^}Qn7AZ6^iN-ytmnD?SE&VabyE`;N=Y)9I#N&wg{`%ykb5MVKA6JjvPMjf zZS6$3tpgeWZ$jW|?vS8|tR3~+;=t<);3}P5yMxM7ur>Q{!e}=72EFhY&fBy{YCWsc~Amr^{@A45t&A;hs(`98}Po_N@u!WG|$+v2}8(I&Oq*})l zzDq}*Ke1aHWYAfok8<3m1SUq`ON7wxW219T5>jD6DS*4uG?#IBZ-AB}Na5;e*0KB6;Sz%Lua-Q18_vr^xT zd@S5aG>Q0?H1$F+fgxj1mVLjF>Fu<6V{JVMlSA2U{Dr_N|BX?5JHsmjre=vSlPj@l zzDb8=R^>QN?YgKt~v*Q~&7 z9gd_9vCvSX(oJ@+Av(*2%YbcxJ!%=kaMq+(i8LTYDGzlrKWmKc)dY2+sh|ABkXc4= zo@T9nCQLh>$OK%FHy(wfD8s1cESCtiKn@}KcswTO8^q)^4v=C0@uS06fem?^t2I;V zd^}a+D&*d(R2LVlS)w^-R2qy*IQ!87Oy#G<)eG|TvHOT)O;sJ=4Y+HAk^z9 zXe|!9T0@0xK^MnnCe<8%;>b6&O>XT@I{RDWMrBTO6yoJ^lbm3q$y%8cVA>>w=oPw2 zqrFHtr_RY*j1Z_$(&rN-v!WC@(h3$L!N1boQ@tdMh7=Oy!sls9Gq46hrEq~42@P0F zFY+k$A_*FGcQ!SpzNb%ULN$RtTdxQSs|D_I6$sNFI$WqxH4h@d6&6r%ah8RJmcBp- zo=_QHe*_$7I;z!of}85?>{RNsG5n&gM>#BtqS+LdllD7JI7GmU6r2M5L2 zT}eoTR4a+)SfmY$Q@r}7xU z_MT?AYpBGPHTD_vKOe21F=TPB-%SgEIza>T8{ zd>7;Q=%BT3k`^GHqbC?d{gRGoXdmE?>?|!1<>+eq&g5XK18-$Z6Yf;tMw8h39Ejh$ zbs7bP`eEu8=y4s>31vC+s2&B-?snu#Q}&5%sR|qfkPW3ZTp>>er3FZFM9K!6&mfq0 z`4^~7AspaAWelT4fTLm9&_ZosAqj9c%9?Tn&Wbs4XJADPZ;Od@)R8gSz2%zJ);b$+WOUOdjOVC0bbLY9?aQ3%Im8`+HB^aqLHW zx14m^Ka;vOWaLdZNA^EMzhJ3d%iO1XM>`&M(#m`bRq}okqDShGm9`)?4+M3cz?0u-5);RLbIg5oxTUStJ8D~U9HFz7Md@jIzu)l zAX#W`n?YztT8iz@2%Pl~3_lG2c^|VyGU%B-*%T^S7YAn=m|dze;;EaR_0GU**`~*; zhqvYt(au{@o|R*tnaun~Doi7EGVlGN?ipr}x=KuNG)~UM^&b!vB3x+rzzE}gk@N+g zhWAuj8Nno@BO=8Plj_`)rIOY`#s!-`-HfUqY?bsmDnmC#+70Mu6rRnVZqo;_!}Xd& ztW@AmvHb2*65_YB`sFzbRU*HGW>f7$uRe;p<^G9xOhVdC|R#@uQ=UbNjSni8dJGm$`h0_;tlH zWCPGO`Me|^rHL=(B*$=CPXDPmb31>Swa-b+uC!pdOD*MhX_eXZ6@|pxP6sRm6(S&A z&V1z0A@(lf%`rcaNbg88O+)ZuWA^O^Owz5d@mvT3m6}(03oi^x4;V z`idSvynJzO0McWg^;&-KYrLqS9Gi*>?gPb&Jm|Zvd}8(4J^QU+iREhopH&5HvrK6v)z<3}yz_i=Yt{Z4@nZHe9uOnq=bImCW1L09)&%0j0048_^pTX~1ChJj z=X9iSDZD);2zTjx(%;_?Ww>(9xK~Lm?ZWfAm$+)^6%A4u$EjhQUd>P z-;C;t>CSU1V)Yh~=}e(Chgc0A1~Q#{qTald_da*|S)FOp{;lemfQc*ec7T@(3A0{b zyEZ9zamoZ|5Jl%A7jz|PCavj^)&w>6|1>h+WFIrqwkcxGg}E?G(?0*F&!CJw)$1qBgJklB$Vhxv6W{`;gP&-|=l zTmb&Gd0hz~F}=^!FC~XdrH2o{a@=VDk6HP;^I^gB0MS*~yeOqR&Cu=-OfBBz{>Wt8 zXGXwIo}t!8*~fGvr)~}Z)&)$-akDVHZ#>CtctyrxK_4V|<^wEC%p-H9}>1SyMMSc}xTK_v-5V}}*vDq%SOUxb2 zj)N1uOZIHrd_TiBmgcqw29^v2`j()iv&30cU>@!QynJr3F2i%egQfkW9n5mi5zp}2 z)0cCBWU|0hIFrevu|%scTNz?dNb{KM>-Y3oo*UlIy~*<*rP1G$>*JYYT{#egL%HH| z6&9?S!fh$w+(zrPbb3b`r%C5RwWG78thZn^34ZGCL;xX)u=x~EdWX7eM|X6++Vw4$ zZQ2t)eLY!_J=bi$nkPlv)qq?&c!oREP#P0JB0<3n92jqXQ+1t(B9Q>#)qd`scnK!0 ztFyU5P7(>wW3{Qt+08s#uE6xLnc9?U4_?~KsWsG^PK%TZF-D9|;YctQQ6=;-rO8C! zR3hzwniHsr=KYTd56BW$WXeLQ5WJbGFYiRCR-~=&pT;Gcl+5w;5Fi-VInb9DXR!T( zqO~*Zq0eMVLhYPGDMA}T@B_vO6pUyuvrGgxtP~t#00bk!OnEYY_B5SBnGM>DPDt*8 zLV6kXW|7t0AQk0@>E&)FlIO;uWvx({#iZr-q)&rYVc z!<4tsjr`cJ%>R6x`<1c%+V%jyNJcFx?j6+3cy6AZgzS4&WuXZ&l&a`*+3-GX4szxC z`Z&zFh4he;EGY;}E&E_vxt7Cl##^9EyX}Q`!`1SE!QOpSHYW1Sq z5n4#vouCbN_J_hAScoLOVLH7Sornmx+mC>_iwP=xCr`S~s}fT}E3o`R7l#jNwBvw< zsq_jpi5f2ObgK?CKIy4pmoAq~O?{60B7sbGzDUT_@N3crrjU7oeG9j0=N_VH?T>WQ zMKbgl7j{QDdOKIFLdws2BFh_b7qCgC8ZqwuF z4_}^0QR&R1*K4#`X~SE}5gn>j#ayNsa{?|riivGlGjoTuRoK71(3!483j@o}K|zF< z7cl}#R|h!>XzN>bN^ouUGI%k)mxxe~IRre!hv zDZQ72Ep$ak4?Q?pnocv8Whwa^62w~xJ0%k{OvK5I2Lz7nHr$_N+*1{{1svHn9qBRj z@`#Laeln4<8nbm}UyDl)A0j!lbP>xr^Q3QjCz2rlc~80@p@)TN-rk2 z=m3O=CuHx@1`eYb+-+|MRX^aqTZ^os8=1`E_k|z>Q&sVA{6{{4ZkfMJFjay zuQT3?Pt1?%=qbDT78SfF0c-psb{lD^G4gmu_jyF|jeKu^O9go^-H=$nFfl!Vq;m*O zTZ@aB=iOU%3}^UA#lM)&eXMe=9=>n}#_Mx29T(FjZHUHne1y`Kv6dS>Y3e;Je~(g^ z=E+b?)2vx)@3E3o8QJRW-|sweAD5wCN;0gs=#pMHRcG7PaEBno9NFC{xvhCuQFDv# ziClH&IMsPYth2K%!rh@On_MtvSKICC^rm3PiZBYk(TMj$j9azH^w6S~giTOz zs175NGqA@QB?xHh8xpX^0y;36uCWq_f0)RI`rDbg+HgTP0)?q+5sVAh2?AtjVU$c- znG~Tdh$$B=V9MLwYpW&= zw6-7-B#%2^AcH>9@Eec9wzP%%j7+lkMmUI?`x!N#kw_=x^H;Z+-u_^45;HBZ(Z0yZ ztQV@O>w(ZlxDwtl3_;bc=F5e1@_jj z;taQ_yWQ{t5rrTKJ55iP;rr586BWQ0_0bL4vbjyQOe5*7 z|6roKp)nmLAXgEzqz;!s-EV($BJDH~p;i1_Pge+foG2h!=khmN5W*9_7|hGf)z?DC zh(e2)GKUc)R3Dw5@3b;acGJ^jm9+VVbwpAY9hsPeBODTado6Zb zS6^-ymcM<Pk}MQcYX0A;fp# z8dwW^!%!4J!uBKzbz8wYY`Or-bu$Qrt+dj|bh_tDrO8a<+WAsac3~Zn&549)VaJ^H zM47s4BN_N4zbrV(^Pxg00R0#8yEU((4oqK9p_Qp>upphe^q@jw3(y_yYRKPBk2%WY;Yzw(79F4?5;S68;s zD;JyR%MC$I_MLEC^RFgKf@Wi)k6SF)o2$D}lxyHhc^Wa*AC$;!RS+IW-e+ z_NigZZLaqFZKhSkJhXHsUEE;(A>TGF3CR)&(l%Vz1v5tnPwM#x%?flq!`@k`*|sHo zdnUElr>11$NzIXPTYDG-+s<;><5Dc{VnP4&x(x3I%nfN0I1C=q zB}fM@Kddm{NBMhcS(}Ak!+G~wXY=MGT6PO8+a8`fcgk-#<+rj{L)4U{vA0;(-%dZw zq&kXWEf2)dLi!HX=A*%wKi~JbwO)nZwZiGiD*`uaqLRPa!oF`Fe5 zsbpq=-tMQj$;BW{$d01SJ=S027r!nA%kfRArlIUDsl?pECpFL>C%sT)cs*I~??kOS z6d2~1erHrkJ7(}kCAu`kDv5UTwq;b8Beiwyu|A}9u9!&b*evHR;BTcq|E)w(?``r@ zWg3ZXxDqW0$O*ccO?vRCL52wo7c-9yTrIN9w}I^H-;NG8Q;QNIZfo5T_<$RZzI=JK ze`v!!xTJfs3KZo4l+!2kx8FMnFS?tsb!!ih%w}{?Fp26Dy#4ff?S{Juh~xcT^rm@= zibyDO^K2Oisk;eV?cYt(YoSQ;b_Ng+ZT+Xi`kiCL2vOAb_x96hy=-NmduQ?l(gbwZzGB_udk8*P)7^u=qEQf{(76M;5tP${R;6sV|d5L+YeA z)brc&=~>y%GC5O7o2rS!QzdUS$xpmJT9s{*^OtD5C(F&FTKgKUd9{}6Wc<(k@XzKq zv^wE{Yo6gr89i3zpDm}k@$SWQ&L)#0>E)jF)yx)}3LR)QO--7!K|ZufZrqQ?Ud@>n z;x+6apig89=!yzrBx%%`p2l1YH|yj%+~cX-7M3z{Zm-Of95LV5qHZ0`EwGQBZLyzi zQQKMvZ3VPFVyH6y+(4S>!8TLs*cUBNebIs`Z4Yllc&w=wu|O-3ZwS$J8_o$FXhZ|i zhq7s2S{};FSbB*l8U$_gpPOlJ(V9W#xEgoL=pZ*>p_9^hbyzEMX@w^Ga_2Om8CiI- zAnNHt7{{dp{d9UX^6O^026Pnu%wptgEF>VUP>-j9)0I=T5NDyc!+%}O z9OHXE{GKA|yU2rN?xYc)Y|rm>q4MzW_Uq$DK3#Mlzu7`o zaDEG2F|``nW3h+LJSOQ5`#jhZ_i(26VglCa0tYVL^zy1{SWuP}Ov_~^p+?K9h?7;R z8z5wA2xpOcR*zPI-n7GgnBn8Hq_2~pNlp9N!3OTkRMISsh+meiNME}EJ{e&q)ZEOZ z)$qh1K7CratE7EeF8*A*0EV(0m)*5U$PBi}vZU+tFzF44(11h2R$D*Ky|nm8EbrPF zFM7Aquw-b64XLN2SyUjMN($LZw57a4cE5^oqYC!sd})Rr?GQ1NOQWxz&QK;ML*LA_ zkOaUTdb9cq_7_7^SxF?0j%L&KO(Xkdn7PrBGOj2?($nvGU;>GCt)sy#4V&kl z-HBN?btMcvt|ocLX4zN267$)-v7_pXk5`YXF^@jMrikI^`NbZ<&oZ2|GqS&8H4r;* zJ&LIX3a54_F>Yr`gOY_TEriAmL`u+Y{e5N-c;JwLW?3}APo6?Qn1Oo9jCmZ$E&n<%kk?exGAzb0ob1 zS~mp)^$QF0Vm*-BghbjWk`Fm+C7n=4C2np(8J6;>hM?nqA_j9nmvr*!j{T1#Wb@1u z^r3Xr5F_pMKH12B^lf75Zfk@KR8WVEUQ&}Hi0O(#nT_tBcbw470t(S%kS55^Hgeap z(neWpSg^PABp(OJt}WGu@WiGUN)K{RP)^rc2_#n6$!oJz%mHe+5`00_~RkE(Lp1%#Hq_y=L>bS0;dI$9XJK0DFJKB*y)h+*8P{D z=gKS}y~@;t!Gtcj0;Y%QiqilAoN zu_b*RrzY(&t&>3&M(?STM8a8S+-^{yR^V!i@jv9Fu0@Ad0xWZ|iZ;t-$Y!GMRuZh4 z9(OFg(IxTO=O-}CW=@dP78j1#{Ae8Rf~pOmx3z{NecS_b^fV(~+_KvFa|kXj&R@&C zYQVnQQndGlEZp%EDR**vy?b&4>; zH~Ba7kNgcCP)WMdpSoCqWP=b_7q4gvM#7^5-Y#`BPLpn~u~IE$`Wu=!&my{BYj~i| zJTpTtHWx)SRPzQBLu()xUFM!#+^RFI7l&vpxa{lV4lL|_PFTh*U@)bDAz7cUYeUtobTjjd-}+^ z6%NA+2bzW}9554l3Pft^W7n`x*Q6_WV+Mj^4PI_VPPQ$DRb%=&`XBR&WGO0F^r}pL zLo)rwLEq3~IJ`_t4{PZ>JI$W#mBTJtvzTI1R_skcQtG(fq0|Lh}7(m*a*y#3O$ zdBi$fM|Vx}^l0h%EV95WJoUQc1p~HJ_F4k-ZxXbApWru3EtJ~t=t{j$o0&o z`l_XxIwoXUI4Uvzea(PO7j{Y`)(@3z*X186c~u_M-}b_yz*XGeO!OO*bRUyvP?VYW z)a*o+gg?=Z`?dKS-nqnCGvKr??jeHmCC(@^Bp=G91>Dl5*N`gSSBE<0i7oVFFcRtQ z@yw;9*L&7pW~NO}e4DSRt&E$n2Yl50LU(RQG+;-r7OIfhW_&{`n?m~e^{w2=#O%Q- znPzyC^NNr}Ts9cU^IpTP6S88akC<9>r;<9h8Rq9hm~GZ7!EX}(;IvBh5=ai07{niN zOUX^$=$vh={&lR#$rU-RrHGu)KAFn@vD*4HZ~BZZU_N4Uy*Ek!fi}X-a5qFw+$~mn zFKRTbU|QjnC9@UIO;a>w=YGtVRRvbiCNFah`uF@p10^qKDARUsCraPB>xirlRLjq& z)5{lOUd9Z#Co{^)Oww8tL%fXl+!jKr=(^<0?vSY8hH;~B&~RuG*l|kB)@>xUa&2oy z{j3R|wv6;5EK?B(u;#XjHNJwekE-!l{86{NtA^|4|Q`Ym1E$Jwz3J(qzE&6f~88KCoIMs2 zGlJ&*z0Q2iRGO*f?_!mvk>3rQSlWNP*$7_DZQJHxiYlyQ@ zqNnFl%v-g@k-en`wS}q6hpwtASw_qkYMA@oROgTBoPFv1RW)iD?-^r@Yjv|b<={yOHdh8>aN%}gyRmSoc5ggE6)I!%uWf_(-p@58K`xs?8XkQNU(m&%y=o(|b#PuC!^w(f;&yOqJEDPtm{ z>xiZe$;39uoq1{>2;MOO0`2!ql$h(-$z?B^v6Q^Rs+>N0r>}A(2Ch)irN;9aP7afW z;_&--i1e*OppK9Qwr0nqnvsc5tV~23(%wZMFP0(ciEhXwIb)xAqMvx^J#PB428PbG zWLm0$Sf&YqvbZgVVsu9u-=F~lwTE*h=*Ow=9fS1YOZapjUEN_g~IdxR~^0T`d)O3P32M(0H<~)>=3p`G1=QaG-3MNFl=&oq} zS`U5g=9f|4G7os)er54m4~BB?;9WdXGo!D(ZrrADzd4Z)$G`qIeM8mOKx*Ed$d)?a zpU6J1**Qr|nVo1_K<|Dro!NkPF?VR!nI*>CRcHUk{OfFs z;T;vd$v<@GAZ?lIEFg5FZ{!H)W!ny?YeCKdHpk*%XE1MNqwr^|$2~h!JtBw-vwV(Q zMb(SxZ5nz^>A7N(=AVY!tu;TZhRX~YQ_sx9dgF1cNn8E9$!5OXIy7&p{Yw?#Uds9G zPQ!mxn&?O}?oRis_UmK`Ygh7-eC|b6$(IaD4$sQwzo0T1&v>mz zRm>en4swsH7><5lv#6Yz>&%g|DW|4V-69 zRv|l@k2nd)>TKh6h z@}Itu@8g&+&)!UgGqw>!E>Y|idU zBbRa$5^R@vi@ArCfvd+o$_PAa;YR}Ltr2AWA#)S6_BU>)4|p-%J3h=k90F%JfU0(D zB&ak|SDGI7UR>ig)+yDXo6|;DhBHMue-xc{)RdeF88ihQ!%k+Yv33o;%gC((k~9-q zHb|$1aSGPi8c}>%OEVf2aP7osTBBcR03Qd*egUx75)Enk(XN@1+O=Q*O&GcuT8m8_-_nDa`4P^TPK!@@Q zxy#Df>6!z?%V~qqEY=4w=54%-9@eDH68o>g$U?fiN0ASsB)#`eMv27=J=-D?2I-T@ zlI|YvYoEF;za1h2{YD~;UCYR9Ek5Rq-Yt~-8s|4~vK!e0#qRB&5*ZZs+`1aP)u(!> zcVpqDJ0cPHw`iRq$#*}uJB8WJb{1HhunNh zvZl!Vl41B&zu0(=kF*nRhQ%dfZJ9kOS%tW(#+dFoHCVMk$|9mIA>%ejO z@&mNMNJosKn>)&(#nSgzj(ue=GEhenq)YhYtohjlzLLHCBMrcbCYV{`t&MWiWe2Y@ z&e>1<PnUuVqnXS6+rhx_E78|0o=1GneV%%%1iU?6CA1HF9bJ^CVh zUZK69VEwFz?v$rPj)8>4cEsDe_cCxtzH)`AVz`)Y?`;_U672JQCqR2H=57)>uQILK zS`%IS7)ffJJ;FUH4>Tqvo8B6_7HPd?mZ=UIZ=*3^(0G;1aI4%0O(-qa)Aiy`ABuxKPi&!#~7n;jHMvs+Ia%vxwUr&*qx0E^)i`?pDT8RpIV#sdUhz(Ex|=Vjd@ zk|Ff-1Fcw&4bmGn@}^2iXE)9zog-(;wkIodn4}VhDOtOp)RonfXd2^wm_eS-5TfJP zv6{UY;b3TR7~fVTncO3cJ%iNmC3~+RnFu3}AQ{fv9q2eC;o6fmTS+C8fRnTx#myh{$)y9{KV4hC7u;Z=7i|9)*Xo?i@Rqs zogZ&_Nx6I}1dR79B#FaOA;>2VF!@4`M>Y^f`d24eR%p z%u_xwRj2m!cLxU8)zyOZqgM+g~b3gRLrT*oin?kvb>!6m+@N# z?bN`K(Mu0GAQld9dh~!pbMW|PjgoFmRshxIfbLU*ldEaj*|&y;)(qX+yiUWO2IiYr z6-s(GSJv#z0-GlK+EL^Z)W04 zl8IxXaX{`pTJ|ZcES2gHO_$q$WYR+e-MItHd)k9RW~X$fy;H8a(()g6eRsN?F(0qW z{**OvI&UL$n5;UzXKZ7`cN-h%gLliYW4ZfU=_1vxU(>=F#d+G?rQtl>NZb!y$I^7I zbmg`;)Z>h6I?Ybo)VO@GgCBjW4gEtikl);5O5et^3%v z6Y#{8{mgir?3hQ(d9+&OxWcncZyJ(R1oq5yKR;13nfU~8_I%t9FMmYp{Ad!h=*;HF zd}QS9n(~QEV&J}X@JD9%!$Vky`bjQ}9=>Dh2{T=~eRH`^BBmd*@I=z{4pLFGWG0!V zv5cu1`g9!`WP(EhU0Y_H`_oH7v9)*NnE%wNqsP=L+#j*7d^sP(;0@i)Ks_x}5Sri0M5r)hG|bDsOY zuj_mLoA}hTN!h{&;5zCJ(Udh9O!XQ&`FbxMTb&6t54@ih;r3E!c;sE2#ZP#hwIqB34S_%Plz(7-aF%(O2h$5<>;c?PXV>DZac{JxW5MSLy9JQu3O87JVwmRLMNg zd%86^59-4Mj~C$$X73ig#lm5G#XwDU;2k262I2iwGE8SC;A%0n_ayh+j zpKx9J&;m;3g4}%H8G7*OB>DW>ztHl7^kHf+OgG#|X)+(3x@zn78O23&(BrT)DuPAh z8E$;kX1KGqK{+l~LVtWBGx-DNZSmm2ti>BND)Eq>Dbc^H8`tf`M^v{{)*7O#k23!W z-7s`cSCKrKcp3jAe$~@Eg!RQFFT6xP*YZ@NX=59!T2w~(PmQ{=9n%KQwO8rRTgKZm z`L;~w{fU|LQLkW8gV&$8&Y+Enfh3Gs+7y^D8fS}D)?A}Mi+BrE;&fu_Btd_XO3_$V zW8{BL6AEUj>ah*zrFR#}wJbdC)OmvsZ~=@}ItH<0MLR4=E2|a3Wx-tP>S4n{al_Lu zw0^~S3lg&nKt4nSxp!>}_hmt%9-rw9_KD$v{w_=#TM80SzO6qi)MS8B2{{R+|@3QArG}Pci%+R~Q ztZ$^p1&iIFm|b1jNdNc}U-ulnkZpREKJJ2gNOoln|9q#s#QG{n=6IQ>Ik%p-F?Rzr z!dIKsN2#rh9k311DW$UEJlbD6D`xBKv?2FoSzcEUI3hhZI@_jy^Exm2X<*^CcZ^k{4+=LgP)sS#o<=A6D*)s>$e;) z=38yWraAt0vG}RUG*cAr+V8x7@;fU{=C)rsF(`tOu}2qh{9^t#wenN>mCV?w7lT3m zjCg)?&^?+0uhD(eWtB}?Pp(_6^=}~FRF1f*?sY&GJe8xVoD5k%?~THB7HZLP z*z$`p_!*rajGF40Pb`lqQ4|He^W(Fo>IywoRMRZjHo>M?ZE~??p+Wj<1eLCNIg68) zzi4u|M~&st6~Xpy7?&jrS{vPOOv^5-EYC6K8R>LZ8dzopZ zm6=Ar*i6ekR5=gWGGiVri7I1R-?Bmai6DHG!qg~cL%u<`C*i&}qYc3}0n?&y2@4-0 z-nVV|6GD+3YG?E1i`j8|*E}R#b=3|Nia{bgD0GC)M=(4iXW5Vf8au z@f;&E`rxQ(BW)`}k6#aOc}>pMENFA|(IjI_U(YfqOz@A}duh!GKF8_dbRdHcx1wEL3L}81+?qEw(KU5Jiz#!%`(XM73KtzSoxk1a1adSF?Ad~c{mjFW z^+};qBklBqUdCq0Z!11{2jb!#PX1-90@fpssNsa&36@c=;uuG7j@Xtn9rT+VZ1lFi z;4K~?hDpW5m=$mS3zwBve9}A7DwWmu@cMhu5IQ-JEB?q!SI#q{z3K6TSm3bi%xn8M zH3`D?xY z{7qShWp>fOe^T2#wq|=ph|rcSWU~wky=O{h68Ue=PNG=c@3QmDhmb7cs6eEgtTLwV zm35TxZ)KL0l7>D4cF$dS(Ah_Vr-tTg7&CG(#yOkF*%U2v8f`lc$O0X-<0eHpd<{qc zR$j|uAx}2(J@NV+n;@Urb}XMXjZgZ^kx75i;vB-yMi0p zY+b=UE+Up=?3SUUvrT)gYdHN43N6lO8#>4QqQih$9$k}MYjJa-KQkrcmL%SiRMo;# zH(PZz3yJK#(Z}L8T+3DEFwSip=w4_V0x4Xu_V)NM@8b1$@#E)BwdYNg=*17G$Q5Ao zUpG>4mo%KpVw55@@oK({7&<8isZ1~7kPbp_03 zg2GTCXZST#)1%(N?@%pG3dmx?`?jXzPvQ1Z#eL}(S@QVtd7a1SVS*7|C^H85o6Qw3 zb{>9yd~f;>d(##5-F*Z6cWL@Syy4|2BAaq(;3{j#M~~HyPOB(iOH(w4?Q2OF|J+tU z{r)CQP0)W4kk8b=l5|pqTgAUQ`kg26^@R=T;Z$COblu>=ob&#V?(JeByF~5cKX*{cgDtftg?LOx* z!mlmTxllp`N5@}`kGvSq+*X^!LAHBcu2`2|6m{MikD;CxsB5=t?IZk+D@OZFt>@y; z^QI``bK#~v=h2wv>aSuKZQ{*G;#IAKQEX@an1+hAae+1zjzBXvz|X%1>#;jt5s5(F znXpS0LU2vjnWx4(c%g$2Hf&Ah708%$)4dy5r_)cLjalwghrfw{9|*Kh5j4BHXKkUO?es7lfhkZ*wbd9I8G@X@pLU}`is+iB9WHmp94a%8Ru-6Ju zeid7n{5AMD>SNW8ktv89d2rJlHZVHWJcThIjT#oSJNFeqpm9B-j+npF*VL5>>V1ai zGRF58;Ul`g2yV1G3`e`^t7mwOi26@1AniPtp#STb>4@n$^PhAo4^UZoqsbZx0}CeW zd)yEl9V^`aY8foYnn z8pT?1;fAN=wQk1wgj@m_LgVbscdac>sFJX{djCFp-7~ZznQpC51E&OJ zz^Xp_VJr=v|Jj3R|I$Ye5SzK$PcHF2XDbfZGydA!>(lorucvi763#=|C z*>eBu_*4*HcXY%Cz&~^3wMWNyZ;R{u9 zB!u?QKsbiO8#{w&g+t#1hhoGCMoCWx7$^|A1ML+az`#gi963&vHZ9&JDNlkifs(6KH3HJp%iOIhD=RhUs)bp2 zZjeeCp?(Oj3P+Fva0KiE@PUkYGR!IxlkYce@ZmKMA@K!f=m|D0(|;V+xQ-x{b9e_D(3Aeg=&cF&F` zvtQzo=bC@V)#6o`aJ430(p` zwoUR^r~QtlZE$Q=7D6W(9jNOpZO#uQr&I0?(L}Vr)4r9D!8nMg!mJ~)@}*3CmS1Xw z!))wR8fv~wlR$g`H+E#bxGt9{CV;fNOkbzlyp5RL#A1WYyz&s57*jGUaaV@J0&@~( zmmDFJG1KmxEt#OabPvR;@!aJnxWr7nG{1hkICbGvF2zPpOg^mAb{s_nsV=g zUK}Vk0Yx9`L-uwa_{BiL+Vjf0(BnNbKT$qQib)G(DkONiJfF+-2 zBS8(Zk#Gv&+vmc)H*9Pyxt6HS>#ifov(9ZHZg>GO@xrkqSPNDZD;dtP?i!*jJU4>v z9o%=OcpzcmML^17cd&g`Xhm!pJXe?1LQsGf-$dHn1bzu!1G{$N+M^64DI?R*wS`?=k(&L`e$WXxd_I(zrob}dfeex$FT znp)itAbxuk0NkOaonm_g<#lINd4|~bVPet0W2%H(E>F?vPm%g%{yZO26Fj&WfQ-v8 z0Ak&47F}}XEH=XSJpHpdFXUIRXsX^dea~o9;(PmTQ0hs>u~Fzn2CgnCazo+I|{;l|1t*p$%||A1;S7 z!tiPW3)d9g$1&O(PBVe%Z?7;bkb+aI@PUx$lgNv1=X7jd>?60Q7m{XuoYGXlqzKn4 zRGd-9woM?DU--#__q?}y??$& z{x@{@#!r!|Q)GL)yUpck^LTNRO5s4hHm?mm&o-MUKj`$@gCT^;JI&0YTBkB+4P%Hr zM_Amr47V!k&Gk+88mFrf3m4WfN4`hWEe0-1tlxP+7zYDM8bGk2{buxfB>YvgOfLyn z?V^@dRJtd86#ZX}xUiBo{T{sm-tF-3Az8>i&kK>jxi{YD7LNU7_&ZpV+3h*T7Na~^ z)KEm+fPqL9fS`n*jfEl8HpweKQ~M8M;|KO9i&iV|QTNpL4Gu{jV_Rvk=Y^vkkhSA# zo7>m7;)SDO`G)lJWH%|5FPC)=Hk?*@O;mr0OX~*e$R=&w3piD1CGLlOJ*R^Hc_66F zI9SPzHF`(YOgViu(fo~?b>^D4OhW3$ak}aI#L7{+nL`Pamr$I|^F1nDk>Vj9LmlyT zmG=QP@N1z%A z6Y#{Zd4DG1q4Hb63-_v>@A9)?4|byuj1jN|w|6^RAI>Pm21|;pz#i^$WCfj^d3P4PM80gNY?1^<$djP}^>CXClkoCq{-Iz5STm%Z=^ZiteP z81bNkdo+{b#lugpI(-dsW6u!o3iad~%U~>zn&xe~p84NLWev~=>a*k+ADJpPZGfK; zx*cXZo;6pAXWh~G!Wdd^p;fkqVDAg}MkTtq9ZCive?U{SW&*2(gg(y*Q^EcrE6xF(@uKo2wB0*w^BXse}9r_TY%2x3j_*TLC0 zmdv-+mgdkI93qP?Dg~A6co!R0D`a_Y!wq`>k%Wos8JAMc5(zRf6h=7#cNRvhGgAvY z;4T3n1oRk~3&Xu(dY1wHMEz4N89uz3-k^{TM&!Sa?fI$#yDp>DY?q1_DXr94RvUm7 zRdU!!3`Ge}JZHEcIqbNDz%0@@2mDt65{=lJgOqKoY=Y7gP(A*>bUNRVi7%4|qS(sv zm#7bqM+QNrMNtIVa!hF@M$;uGgu5rscOYw#JAVZR4F>!F;g`tkNKARHq&W-qU>wgN zE0Q^@!EXjY6-}I4Ey?FFX@^D+oCH)wMi5?*gqoXOh^-@c7xsdZrgbG4QQ&v;4Ueo<1YO?P1_#t~S+bLhTY7jsK$o zrv6Bu@PMAX$f5wvIii)|@Pl35^5guaBy9+K?iHan+p27gk|3v%R7ZPJYBkcehRcxt-Z@OGlCw3}Az$9^N7H~b}0 z)8B)atkbq3EhA4#cyieFNMR|dtjqH+1BLz6t*ahQB=+$~6aV`mKAKpFyNwU|I888f zqxdc~IxGu^?+ttn0piTbc;i9 z)&Ih!Autf92fo%D7!(%1YRYy_QpVE2vYhxkmR`M=uH;Sx=f{5gFt59fiAM<9aHdI6G1 zJ^=_n`CH5HQdCI7tbS>|)J`$9ZC=!RSHP;HGY=a_YRl6=4T2fp@eqHDxH{okqb zHDDb0e9TvLo>qQKc?StGH;Jj_C=4p*Uf_Z%ub0F2kioyD&GtwEJ1%7G6tt=vp`ms%K6$EJ=XU?Rx1K-j1|B$cHYQ*Kd6$!@3iFI`0Ra}tCj zmXRWz8p66kcH~#uVL5qmXDxBu&SLS~65jDP=XuvT0MaS~@>@|>BJ#YssM%$4hy za^Re)()_?Zt60XGWR`$AKBy+WUaxEI_1u@1xU&|pq;PqQnA%XAP>~)-`Yoj?IRU>d zPFSAk3@7EREB=k#tL9%xFl6WABm^y3on&%5+9$@4so5-UN`}(hmy8t?OyvJNh?EE! z5-sC@CXv#d{`W~_Wu%k;(QZa1IZ~l!p-q+X^1I} zB)b6|83+aM9KSq`)HkIOX%<;H!60`1?>tiZktbUt%_1YQ;3Wl~Yc;(wheX0N&ORyn zCySS+FwXKrBxKqt#~iZkzr#oTfI-YnO8RI?&hY_TLwBenWJ9Ye)YArKh9BF!Xj^Px z!gwmpCJ~WRfbElLba}b;*0u!9Sc7ap|8fTFoi7UpGACw{E<0#N^2hnDBHlzPt11-Q zp>r<(ZnXCNzXiK(sfR_Xq|JWvJz5?=!AY<3uzHrQ$YZgWH#!DPl`J|~%xxAP#$2}e zu_>5L{`;}usJWHR3KU<-){dp&9Jf7`b8Ybo7F{8fevs<;U2wtK&Bb{Id3)k9{$RpTgoOVw{svd|27)k|JA}GtP-bzweb0#YNSabD9!D3%T>c zVSGcd&6^fKe}^=8^)g=4W4>M7acXH&JPU^-6F4od z9Dj_fs%P;vRk#FGOf&YKh~w{=9@)!PI2h;liK(Xx`Iv||A4&LfJEz~y9egBlY`yrx zl#?3$Le4XM?+7%>AH6O&nFAi&3(7lz1(vlBQ%jOcqE+VC>Q;g~@O&M|Z(dp&wN9hf;+k6}>(6B*Vf8OS=p3&;J$gKR%7- zc>`}B1l0H^H?%P$(<)|@z)T@eJ$_LwUQ}ZaiglLYi^Jyj{h{}%b%uv z^b;w!wXw#M7G?)Za&JQMJ5z{vY_9ctbv1%;mx@c#IglcTy&^jPo&w&?9XUV6be?{% z#(Oy-;Qy47B$fPWm*#i<{*IAX(~!W7wcU3mb7b$=XWMk@ky{(a)ol+GUE%Vb=F9}l z8n0M&-DUkg1fv{ltSqbiZTx1I$1pp>Cb!eKviPvwU&d=oaPR@+70%hbgm#Kb;#d)g zu(>>EHqD2;IaY)WRYXGVTt#H45bOQG1d@Lh;ifWeY*3KRpIs^aMQk|)t?K{(0slWo`0|ofz>_cQL$ztkQ$&Ee7w+Wg zB6aPn$+XSRFUd5=^ILzhiYup#|6={`$D6-cH2}l8n;i7*sVoF6t-n~2tCw{q@{85) zB=|b(FIL%slrJk%^%bePQgZEo(+J+lbUvF%KbWdh)5rL#W12Z$DQtNnk-*zl(BX9# zINkiBWL<%Q)sQmt5sM!~$JYeDAYN}wmMfUPw9C;1NErL0QQ<=I$iHDkrici&Qh0!d zo%E0yJo_>UZ$3Z|WYV|w^m9FFqA#Xo52EOiNcJVzxjm6=gu63{{RX~kzq1EZybAah zP-#s#!VtWKEqE!yJo`F^J98vma&Vacz#%DU=Vz8lu%M_EH^5h?*`o+3Xl@MPUk#xy z>T$5y&5hhvV~*c=S@HlOrxvlG&`{>8|4wYl@6&`V8Nvn*Ykv2n*zw^u_(ou5;3+Zz#WKMoAn0q&pK$0E~74(}>acbC_l=I3oky zJ;Tbw@RFHX5Rq(OIBz8Kb_L?MdWBK92HzkBG7Q^2yTqr7#BF%UE3EO5u)01IZHIYC z*Fqa6J<1#me24xj!=y(SURQHBG3G$r?qhODpE^XBd6~mmGm%_^TeVP2S9t&&>J2fQ zh4Y@AeHa&Cqe9yN5e(0HUdXy6Ej*K0A$ zMH{q-=Zn}F0A!!m(}yHv4YMyN!_@5_g!OX(s*nx{+4~0Mehs>P@VXxUI?Iw4T8Ui) zC_D17687Q#tX7=V4iz57m9S_&9cLLCi`)r*Y76m~7xF#Dq#|S?A4!|7>8n? z9PEtZ%*vl7ie*QLsV9y6t8U`=8@NulLV!-#NN88#NG%L4$11>5J`;Do)AT+s&Na#D z)vT344hWsIWYc2K zZj#vS2)a0ZuWA0Lrg#-<;*LSyVllO&=^Bp6WBPm8GcJ15OyInc_+($eOk?|l z!=|hG%?V7od1DT?z$-Fj?YSo($E7zIH1LXf=C3)%_V5T(MW;ylZ_g}7lOZ;8S|Sic z2QPj!Z%e^fB^HJr4eZd<)7E6S_Hk1C^{r#po#->dnk0l^bFKvC8T}JYy~>6LnFJI3mKDc4QU_ywMO~p9~TM=q^S2~ zVfUvi0aYTB^{fP6>k2YYCw~i`8jYVJKV@1eYi`^U#wCZ|b!-_;D@Capnj#B#3bRVm z(5nDexGNMo&vCZOm81~_47Laay5cTy4_bDSlbg-z zA}qk>!)}_VM2A*cM$EL{e)xOKs|6KTyNSbjwOfDJbnAMzN(%1?(Z}srbWXoE-3J{3 zaP>&Dw&z1i?KkfYEq0Hf--xg-ueNwguusxXayqmkFu)M+YnjJ)v`*Y4S z;%paRpF|fW1u}5Bf<)3`Hs&mPHtFTXR{qB%`X?^_FeQkN#pz}#++%DI@r>#lzwAuX zcP25nl8%!j^x=F!Q`haz2!kN<9lx!)^=d|dSM`?SI* zxzcnQSsz<8%x>H?Fq>dF5pf@?iMWq%`I+&T(>`d>|KGR|QTRV`AGbLW_t9f_4!HcU z6v|(4K3Zs+;!L!yt9@=CN^_V+M306^1O*o7fY8~2Q?!Nt zu`oN-0lz@h2<>${m&gWf)rh+6boJArT3J;eqbv)#`i!Nb@d{ubLg?T|ySiy^u4XXO zXRHz#ovI}a>LZQjXu}X(U^6dFVHF`;b%-f$>U5zhEFVNBM;M3b12ikx(YJ>Fsaw0I z3=A3Ert%eqk(ju~oxi34ljJp0ej+{Yb4%ERQeRmU+Fwpj#YpWlc`33$g#MT)iw+Al ziRUjGOM|_aGmJb-X+fD}e4uY62@fWyFtWoo_pL%`Uymm6yD8~o%A~A_STAj&FjPvG z)K!I~-S&w2WW0VVn4Cpqm|v%)-@~&J5t1Kf5JOrE8XNQm? zEdZ)IR~pWCA&To?c&7hiG2Q8>n-ZWlUYx{FO~h!= zRR%By_^E-#h(T7gqmfGgfgVwBU`VPuCeqz$k%ArU&-Fe>q{&zZ-?LcwEdh=z%<&c~ z(=KN@gkt)O+6}Xdto@78RxBBxl=Fr4pRCh1S%Du=s(S|hg5+BM39e)+c+c|pRI@Ra z>&MDMN=>Y+KImX8j3no4Jgn#IHtdPTO!#4m1oH zxaDB17zyRrvK_YCsYGd*6~dv_0IgZc-&BUiU<<1FpV{9P3)2eKu- zd{>1QY6Dm=^3CViv-g{O3o1^ad${TZrw>+?RC3_x6JK(SdG5`-J&-KF$7wJpK~G2r zR({1_tAx)A#vb!tDtc2>%wL(`(!(aFKCsH!!=Bt#k`n*UF`XNu?0&sZNt*bNW|#QM z*wYUrD%yP=1BVl_0jOiq&s4XVe@HLh7%x1|{^zL*|3ExxH{ZP&g9PiyUEG1nMa(Yi z1BGKfxGcBF`^9+6p?G0KgMX}0T~N{-|M*+pgOg#2=D){igD+UdKg#jf;|Ts!3Etxs zD#&H>^9($jCbqXbR<##&7T;IFzukdG<}}Xx5J&fUxr)qSuNZ6>2bbl=^#3X)d4IOa z7Dws6T1{+VCiKl`ziU}fG?p0tT1M|Jm4yf7XY?tFi~d%jZ)Z&nI;-j0z}%B5LLHFv zZk4&TvcgQyD8`?u&_7dQ?Pr;Nh79MT)mXs@3)!!kM2WH0LzpWJ9)Wtek%S|+xy8>} zrb<2L{E;n&g%Qhl8v%po_^JBrQ?lmGG6M0lBF%G9fC|IFzu z-VFrlQB2jxHxPYMJX4;0qkc|2tZPiVe-lw=F#3`;B%tMeiNU0j&s{B>Co`4KEec6k z?LpVep1;SCqJE3d$6CWZ1_%h7IA0V8mEPH@7MrzJh{e7iMh^ z=g50UFA9Sym08L>qZ`vuTJRU<A&SH{U%S^ZCnIDKxGfyNp4ut4A0o&JBefLmZuJOta?^*W zG1+HogcP1GRwJuA_Wjb3@lNo_#<$I3ip{*SXV=B^a2u6=*Nb%Ahd{x z3GU}V(MucahgCGJ)m$bi2LB0uWd;2roxW@^{AViNZJd&RYvWTB`tNv43xy`5gXQqut{*Fd-2!Ms7eDWg^LO1dF~ zf(yFcMU_(j!1|p`YpaLuu$N%*X5TNiKgno>|ZIj|H_x^mRch<9+6_VaUx)Zy6tAcMpl=VAzFwe3O+|b9$PfD~{sPtZ7=5l2bzWrw^Kizv-c&^TqMmIBy0}Gl3Hy;_gC5k97ebZ&DxA1ObWe?H3%W zC!aCB!t62yS%}F!W#tKEs5~JnTfcdMsey%yoQ|2w%d$S!-^a_Ag=lIji#Wod4=N*J z{mM@WJInh*{Hb zX~uO~hW$>Ta5$ZRIVt&BV+`68l;*L4%74{NGheuh{+fp6huMprm2vPSzZ`aHM$00q zXM_mRY|t1!WXpG%tVQoahtoDV*8Kpqn)C#cCVQwl4WR|-E`ni4k!6SUeLK(c< zcP8=6b@VlhswajVMB&B-5^d_294s;+L;}&n>D0vq*30cEg?CL$>jxEM0*vuu7a3qu znC-I85Y4cSo~DBq;Ry@rkf*|&-==>iY4p;)uX+}q9$r9?1uG!6OyQFf&CiOsHT-D_ zR$-yFSN|?@X!9t^$js|*_q$wPUqRma$E`o7Ud$}n%@h0AFPpcN%R_g^_h7vG?+(k# zH2xHCIG1LflVqN$=C)V^JZOr=0)#G%h!@Y4LfWkx*ZbQO@oD~MUSF?OQu7hs^7cBN zNpEK{MBz8+Xt}>IbXP-{zCBnp6aN1J2(LEWYYbx8EJhcMQQgPQ|?IP;k};*iJ5 zTu$@=?Q-e=nH(>3x)?2S&|kH(kV}rg$*o)om#H+!3yE9a${COu>S0P`Zl633zg8U5 zBH`_Db}-tim4s`;ufXx#9JvA=2;!9A!u^$lm?g-x%3N+Hjw|GD!NZxHopt(*OcuVv zA#-@~^Ih^f4#~XoKjPt|+{2(3atb_pKRz`F1ux(v@+Mh zx$g0MnSy)iK8PCnh}Xv%?lbRffg;<6?DC6VI@?U_$S#%;b|kkdck-4aN8dU8JDQ1CW(n82k**gtABO<@ShQA^SW(HxR;|;m;Y~N}bH(39h&NGxJIZVlndbmO*&hBSJre zc_naA&U}7}*{aXrTe&E#J60-IA0R7Jy7hp^WkAxyJ7qrKmEo}+V_2lUt`r9 zi%qljBtfrE$U11~6Gr|aSB!XNr4R}CXrVg6NIgP*OZkOLo-qQZ918Nwd8xZUBY{5X zR6woQPv2MRx38zORTv9*@UuOTLshnrnP*CZ{SdQ za_xaB^yTY7^s_m>f2P^2e=?f}=P?C=Pm;m@kj&~xOL5RwmHf)7$v*Rvl)&ba3}g9e zvYOMH)7McIyDqRu+&8JDg(N>zb8c*x;q}g6?-74!X8t0dlfKE^R>WI^a)Y=a#WZQ` zLZQ9Dl@}U}F758j>+T#tQTCYuy(RT7UTOL=0!unQ+oXv0!rFGWNoIHOr`Yir>5P#~ zyE89@Z;~e6Luun-vM3D7pTkX`+HCCa5MvJ5y9@YLSC03RMZGvoPNf!KE-4nMnq|GB zgSOem|3RYvAXL|)$iwS~a#+VeQZPP9qJu=?hMkZ9p&Pn%40vIw^SNC%Cla6R<6n?< zUy$-+vhdQeuVj%m7v^&O+Pug^RQm)UQ=Wru#$mz1XO`WW#z&l<&X{dVA97leB z9^O%x+l%+k2ZBm|KEFL{{OL)ZPfuF@2U)&{S9ajnXbTgBB0#m?yHxB1$MuwEd@1Q% zN=#)!8LM6CaJutd1;R>pd^zb{PSo$Q4!aB7EPN59F^Z7Omp^WZTV;qtj#^$9(2tRU zysk*p;Tn&SRS{DB;beuwDcWhKdVG7_s_k(a+-U}Sm!2mwhe)wAmDBO3na-yfx5EKR z0iFnWd3e&y5K!b8BD%cd%JE?m#Tm3{P7M7IyAA$SZ??mc<3(b60iCg{%Ay`$!Wx#a z8Yw<&>A**7ZGmVnu=^b2x5Pzni7V-cjnF=Rk9_Mr@*Rj~LXqkkUpr-J?G)k=Z-`6F zw>$CyQ}&IYm^5@^lE#kZqB|BiDszcj8JVyb&HNZ;&4x*8;b)U!-*mECsj_~(L~4@h zXYG-J^VC$OfWJ_rQf7{*LE%j*{LGtY zq?)_jjDw0vJYCKq#m+JV30nLO6ip9Ws1UACUe4%ysD8dPbZGp=CHN?QafzZaFuPnY zGxN6@AiE>|6{mSxiEk56^UlmELU$tZF&9p}z&AX`7#0nuw;5hel(+eh@nV^ZvP!zm zj~fJwWeuzctulQsSt;I2baNdKf`uK}T>BeOKTRunl0L6VqPA>xpLoL(M(ZrkrZ1^M zJ2b?0KoC~fP+D1L>M!3xLTeM5U(iZw4|g!*j2YxZF@s5-h8JLeJenPLyAv)NxW>FV z1H;J@{!msuOmk-{svytjAGC@tfd7z-zSRF^lKISbe$P}=Mt_1ks-LE3^A)q`0$#tr znkqfA4lmW^AdncadhYSSn56b)>r$%wI^kMVCSHlz%6chD=1t@4P@Tjx#q@ zHkR{g(`e~V^vhAe{QXuawb+at7UgmYFehb@_Xg@QmIb2J*jQQGXe`H0KG)cu*B6<~ zcsrSbtB#a2z<}RVPO6!E92LPh>6*1j$mO+Y2=HNu#n?*+a~&WqT1o^$zsYoh_oG3~K_NHzI37W8!A2*?b^_m~{do z5tN>iCb3m^UCZm6wTKYIQg0vgp|9|72-oQCSuD~i_};9vnqXH1wNjA&DWxaeF&H#v z&lBg1t=(u|!^?!>?p!n(qQ-1>wx&H441xHh$dUDRQMm|CHBPZ0{g;p5Qw3E3_-l}d z4WFFi`3u!tOIov*>vv@8&!mvmjv>s=#%rA?_tNlAuSrN-Ww}iw1*SEW51Fsk0AtbL9i}hs{^)UGX}Zi@Nf%8gb*+!=W-9;| z-qS~CXIU1ZC#erJr#G+)_a(ShXj4|+KO_b?B**ulKaehVn==M z??L~}-`^8l#XrYw_hWJLS5%Zcr6}eKx2K?@qQp~CQDCpYU(l{8b(g@k2J~N(u<20i z&P3K+-AKEaK7WK5+T*vap_aA6RvXD1-HcI4#w~Ap7(>L)^<$|i#&;A8F4hWp@wM^+uVPX4@jyH+_Yzd#mRUfe^oa$cTd zH>@Z%JmJLd^7XA6<3v!Dc&gsSp?C_|`DqEsf27~1(a-G6i-xOn^gLBa_BQ_XRtz3s zDnWG+UL8ImEK5i(Q^fi^R+cl#1u^FKUh4>njWu&A+$k^}JNx{hK^wiQL6H{=_0qT-*>)63clRmnrY`T1bo#T| zVhvydF>>N$W0`2YaP=;<4PvPCasGAS4ArjTbGe;Y#DUut$TgHZc;9pu>I~u2t?UeA z3D9G>8$=N_fes7<^n2HEL`iviol$as41{6)p`0xZH6ps-K$@1+!J-4Fb8H4OT3l{= z-#l3`mxk(8(5@iW_?oQ=*49SP&lAK%Jm30|^AAKd1Hisq5y^)$jE&0I6f=wi zxw;l@b}yuwkeJ+9c5SB~7BK2=KMt{>DV9i=7*W~Nd8)46P6 zxY&P~rT!Ld7jnma?0r5qJJ;CX59x7%&EugbSXBVMlo)-?<1=Qva*QsgG24+-m=}iZ zMWjbD8T2@E>5l6Gbt|#4uriFS#9%SnCof8|d4)@*LAiIIWi)of&9>GLXc6^(yYMYIK%CvF(2HWKi;RYKDOsKc4CM||VPSlwE%g5+lcZdHjw%wq;wowkOgc z30swft=PsIX-1=wWQ=2D6KoDoFJTKC+(@?ZBYp_k)?$I#;uhjU1Ezr{O(3mX9!XDt zQ_{06n{8;>ODpgQ#^iQ5B+1dJCJn|hg5m)Mnt3XyX|A1I-w;^#PjMR9x(2!neumvM1h!*dscDE@Il|t;v zjMS4Em9m1jTP2WZLaM`G!Hyc@M-8>`NBa|fu>mQ7$G;lV17FNpwaybT<=tgYuob8L}@ZAz!z2dH9M6RuPqmscac!#Wf z5ey<(mZJg3(S6vcT?tYECQ(?$>(K-e;C*QwI&*>#N$zH%u@hOT6In`z{DX;B1cjHqCJkj=*vib*%1q@Z zDZd0teSmSsEpT)1CF@0tiFWvSDZuvUrT6CnBJaD1G*oud@a>ZZ{g8$gVc_4`hoAu%rhxf1)>$0KZ^z5m8w#pUUNZ=ZivN)u%6`3WSqZX zL3h$rANx^d`bU*9xAgryTcjMg&_G0Ns$ti1;@5I)L3FWJj2>G1NbmI@SR#@P7^tZ3 zM$8-^B4EL5{*Jg$p<7J_L0{OHbbJO;F2L3V(rW_l*O7kTD}@76SPB9~Eo?J$|I3Q_ zbP=y2SnZ1?{YpquBsIh?msVOg@##$4ZQWV3^LY2IHhGs+31TPf~sVNRQ3Jwc2u1ml7~z2j`rGsXtj}8_e4` zn5Etv^r}_6kRe!-y`?0335qBFLU4@+#=x@*{45{>%N z;ZYtsLJcpYthG#Sds)bk3K=4Z{-GV=N>KpHK*?d7r2rOzP(+xlVJ{fgzhEees(wss z!_`<|EQ(5zsDu(S?6u4-uVsqo)gl#-R&db@nW*jI3S zVVAh%CC+?{9uJvL=^`tkLtcDg)TBgrvz*ND8x*Bb9QntK{re;!JI zF6BeV4do-J=ob1>-%V2U=VBuidN!V&9?u4tv3IOM2_vXxk7aCmEW`Px!-o5B06HDp zSv zSw+!kMUnG{$%d>$7{nuNXK89@>6y|3$Vx~xU+bX1E1<8=M!(O>PG@Bs(VLxraFd`z z{64GCiPz`Yd-!0Oy<;DJ2TxDg-w|uWW_EP21}@#eUB61YE7sYmubi&4v-|Q>_vItu z-PW7%4-5!*L8K3*^UK4@!&+yMKsxITtFeYoHCu~9i)o(uWmgGy<^JS2Agp%Oz_df$9^ z8Qp7E>{J||?Kvu)c(vZrwoyv-^ouV)vSkhNzAZUYK4f4%MQ;V^PZmh0>ieF9xa>UW z^YqO{X3UzyPm#i=(hhq6qipQpg|UOC6|tcn;hA3N9a-0g540H(Jy;#uZQ1N{|E1=( zEZnW+8m(`&u!E+>Lp@!Y^_=7mSwLjar{=f)2e!&utPK# z(%9IZqI<{Th|wM+ncj~^iF+-#pzu@Ms-d>@FvjlbD}}Q$>27s%+N~64SuzBB;e*PB zgl!2ttnlm5p3K}^R6P_D!w1;%Ejt(4lerv?iv`Po?(mOw|qlFJt+_zge^d_o$ ze(f7{zAXG*1klxA&*OeWg?cfna@3oVuq;mwr(-U9=q;W6zkH#kFV>D(IQz%+#mJ$b zZQbg&NEfRMT+JvxMQ=A${rAGE@w&B5b?dLZC2k7Q)wT4aE(k5%wN&dgA=RT_{N^IQ zuETS)yL6Mcr*5jEAU*#ej}D-mh3BS6qCc*F_ee!(zV4#rQ$|FK!bGF^58&1{3QItkGTbp8|a>p<9`;coTOVH7-$Q`IEqd~^-YsV}6c_3TjM@S#Neq!IiNS{`XXMv_Zk=gt7V zk`#V$6G=~?{{MEqW4PW~T_v3hmRr{T%m9Z@zwnm_gb6R69UpIpL(ET4*Obg)oyCzc zd~);w%!AK6heE7AaXEPB&hJFF?r{D?E%#juPVASLE};u+6-A}|03Kpt z6;IUi13lJfmi!HydWt!&)2T)CBhUZbxVm{YZM%}$BP}Q99rRe8C%K^TpN4iH()fmU z@%Goaa}(o>D`z0kl~WLC`pQ;_5FJDGPZx!!mY3kHOmm=nYx1Vy(Gxo}Cp^z+%*=&Q zWKi6^U>Y{f?A(0&6mD90<2JJ5T*B{h{}ozS{OXhB_Ww$*ekt=F$DCh;khgg#iQ(Gl?Dwk2YX2a<<`@Bc$U}vfNtg88gpLjl8XF|I5fN*-V zq>+LZFklbThZ|9%IEP;nqi2_rwyjQD?7>uSAxG~Tb37=8)6q4UGlv3NilL>MGisg@ z?pupj&#PNUhLr2?)l~EJakKEIS@H9vb^Fdf+HDk`yy}_A7p|FqxU-M3+UZ{}tJP8I zr(OZLMdGnUn?)@ran+z15<=$o$~CA+;hW;GJ4snlT0(8YE_tUO-l zd=6eCR)YUg>#hz3a#HQnP5ExXdC*oo$JvYQOT5M+tH~b9B1ZevTC4_qApX%Oe$(YO zR@>~r=}i^HnW;Q8T|B1WR)UWZYsh{$SUYvdias*L&?XQJBB;XALu{pr1V7%rt|BXc z?@MMjZ~Chan_&S&i)x$kySMscdV8BO|JLX#{nhm12P=xKo{Q1e7S%q#9`Qj`1>)D=J9{2~$+xC6&Rp>orW^wH47ubV! zBg7WHchG#jN!Jt|6|)cez&YS+ZR><$5R`(hGTl+u<|^4V^>^76BW0wV{Kh~Cs_e-B E53ICynE(I) literal 0 HcmV?d00001 From feea154e89bc24d0b408c6927230b4987a6c4357 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 11:02:38 +0100 Subject: [PATCH 30/44] remove working dir after test --- .../eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index 50fe73d9a..477a4326a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -4,11 +4,12 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf +import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.{col, count} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.extension.ExtendWith -import org.junit.jupiter.api.{BeforeEach, Test} +import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.{Logger, LoggerFactory} @@ -20,7 +21,7 @@ import org.junit.jupiter.api.Assertions._ @ExtendWith(Array(classOf[MockitoExtension])) class DataciteToOAFTest extends AbstractVocabularyTest{ - var workingDir:Path= null + private var workingDir:Path = null val log: Logger = LoggerFactory.getLogger(getClass) @BeforeEach @@ -30,6 +31,11 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ super.setUpVocabulary() } + @AfterEach + def tearDown() :Unit = { + FileUtils.deleteDirectory(workingDir.toFile) + } + @Test def testDateMapping:Unit = { From 5fd0e610bf1cb1cd6f8e80e047602fba2703ddce Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 25 Nov 2021 12:10:45 +0100 Subject: [PATCH 31/44] [DOIBOOST Process] fix filtering to filter results with non null id --- .../src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 016279787..0ea4b66a4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -146,7 +146,7 @@ object SparkProcessMAG { .save(s"$workingPath/mag_publication") spark.read.load(s"$workingPath/mag_publication").as[Publication] - .filter(p => p.getId == null) + .filter(p => p.getId != null) .groupByKey(p => p.getId) .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) .map(_._2) From 1e1f5e4fe08be8bc81d23e1c1d40d0886fef00f4 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 25 Nov 2021 13:03:17 +0100 Subject: [PATCH 32/44] minor fix --- dhp-common/pom.xml | 37 +++++++++++++++++++ .../AbstractScalaApplication.scala | 37 ------------------- .../application/SparkScalaApplication.scala | 37 +++++++++++++++++++ 3 files changed, 74 insertions(+), 37 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 4a13b3459..7b18f0105 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -21,6 +21,43 @@ This module contains common utilities meant to be used across the dnet-hadoop submodules + + + + net.alchim31.maven + scala-maven-plugin + ${net.alchim31.maven.version} + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + scala-doc + process-resources + + doc + + + + + ${scala.version} + + + + + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala deleted file mode 100644 index 44dad93eb..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/AbstractScalaApplication.scala +++ /dev/null @@ -1,37 +0,0 @@ -package eu.dnetlib.dhp.application - -import org.apache.spark.SparkConf -import org.apache.spark.sql.SparkSession -import org.slf4j.Logger - -abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { - - var parser: ArgumentApplicationParser = null - - var spark:SparkSession = null - - - def initialize():SparkScalaApplication = { - parser = parseArguments(args) - spark = createSparkSession() - this - } - - /** - * Utility for creating a spark session starting from parser - * - * @return a spark Session - */ - private def createSparkSession():SparkSession = { - require(parser!= null) - - val conf:SparkConf = new SparkConf() - val master = parser.get("master") - log.info(s"Creating Spark session: Master: $master") - SparkSession.builder().config(conf) - .appName(getClass.getSimpleName) - .master(master) - .getOrCreate() - } - -} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala index 247bacac0..6541746b2 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -33,3 +33,40 @@ trait SparkScalaApplication { */ def run(): Unit } + + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.slf4j.Logger + +abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication { + + var parser: ArgumentApplicationParser = null + + var spark:SparkSession = null + + + def initialize():SparkScalaApplication = { + parser = parseArguments(args) + spark = createSparkSession() + this + } + + /** + * Utility for creating a spark session starting from parser + * + * @return a spark Session + */ + private def createSparkSession():SparkSession = { + require(parser!= null) + + val conf:SparkConf = new SparkConf() + val master = parser.get("master") + log.info(s"Creating Spark session: Master: $master") + SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + } + +} \ No newline at end of file From 29f69f2f89d55213b2e07aa618b9113299c76939 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 Nov 2021 15:22:04 +0200 Subject: [PATCH 33/44] Sprint 4 --- .../scripts/step16-createIndicatorsTables.sql | 405 ++++++++++-------- 1 file changed, 237 insertions(+), 168 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 0ea4a5adc..15f2f0996 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -8,7 +8,7 @@ join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo')) tmp +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as @@ -41,178 +41,178 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; -create table indi_project_pubs_count stored as parquet as -select pr.id id, count(p.id) total_pubs from project_results pr -join publication p on p.id=pr.result -group by pr.id; +--create table indi_project_pubs_count stored as parquet as +--select pr.id id, count(p.id) total_pubs from project_results pr +--join publication p on p.id=pr.result +--group by pr.id; -create table indi_project_datasets_count stored as parquet as -select pr.id id, count(d.id) total_datasets from project_results pr -join dataset d on d.id=pr.result -group by pr.id; +--create table indi_project_datasets_count stored as parquet as +--select pr.id id, count(d.id) total_datasets from project_results pr +--join dataset d on d.id=pr.result +--group by pr.id; -create table indi_project_software_count stored as parquet as -select pr.id id, count(s.id) total_software from project_results pr -join software s on s.id=pr.result -group by pr.id; +--create table indi_project_software_count stored as parquet as +--select pr.id id, count(s.id) total_software from project_results pr +--join software s on s.id=pr.result +--group by pr.id; -create table indi_project_otherresearch_count stored as parquet as -select pr.id id, count(o.id) total_other from project_results pr -join otherresearchproduct o on o.id=pr.result -group by pr.id; +--create table indi_project_otherresearch_count stored as parquet as +--select pr.id id, count(o.id) total_other from project_results pr +--join otherresearchproduct o on o.id=pr.result +--group by pr.id; -create table indi_pub_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM publication p - join result_organization ro on p.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_pub_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + --from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM publication p + --join result_organization ro on p.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_dataset_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM dataset d - join result_organization ro on d.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_dataset_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM dataset d + --join result_organization ro on d.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_software_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM software s - join result_organization ro on s.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_software_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from +-- (SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +--END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM software s +-- join result_organization ro on s.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_other_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM otherresearchproduct orp - join result_organization ro on orp.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_other_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +-- from +-- (SELECT year, country, SUM(CASE +-- WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +-- END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM otherresearchproduct orp +-- join result_organization ro on orp.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_pub_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join publication p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total; +--create table indi_pub_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join publication p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofpubs +--from total; -create table indi_dataset_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join dataset p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total; +--create table indi_dataset_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join dataset p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofdataset +--from total; -create table indi_software_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join software p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total; +--create table indi_software_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join software p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofsoftware +--from total; -create table indi_other_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join otherresearchproduct p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofother -from total; +--create table indi_other_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join otherresearchproduct p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofother +--from total; -create table indi_other_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from otherresearchproduct_datasources pd -join datasource d on datasource=d.id -join otherresearchproduct p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total; +--create table indi_other_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from otherresearchproduct_datasources pd +--join datasource d on datasource=d.id +--join otherresearchproduct p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +--from total; -create table indi_software_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from software_datasources pd -join datasource d on datasource=d.id -join software p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total; +--create table indi_software_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from software_datasources pd +--join datasource d on datasource=d.id +--join software p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +--from total; -create table indi_dataset_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from dataset_datasources pd -join datasource d on datasource=d.id -join dataset p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total; +--create table indi_dataset_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from dataset_datasources pd +--join datasource d on datasource=d.id +--join dataset p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +--from total; -create table indi_pub_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from publication_datasources pd -join datasource d on datasource=d.id -join publication p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total; +--create table indi_pub_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from publication_datasources pd +--join datasource d on datasource=d.id +--join publication p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfPubs +--from total; create table indi_pub_has_cc_licence stored as parquet as select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -231,11 +231,40 @@ join publication_licenses as license on license.id = p.id WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp on p.id= tmp.id; +-- EOSC-TR1.1-02M: +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. +create table indi_pub_has_cc_licence_tr stored as parquet as +select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr +from publication p +left outer join (select p.id, license.type as lic from publication p +join publication_licenses as license on license.id = p.id +where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +on p.id= tmp.id + +-- #EOSC-F2-01M_cc Rich metadata for scholarly publications +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. + +-- Same indicator as EOSC-TR1.1-02M (Najko's instructions) +-- create table indi_pub_has_cc_licence_f stored as parquet as +-- select +-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f +-- from publication p +-- left outer join (selectp.id,license.type as lic from publication p +-- join publication_licenses as license on license.id = p.id +-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +-- on p.id= tmp.id + create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp @@ -270,13 +299,53 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +create table indi_pub_diamond stored as parquet as +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +from publication_datasources pd +left outer join ( +select pd.id, 1 as in_diamond_journal from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp +on pd.id=tmp.id + +create table indi_pub_hybrid stored as parquet as +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +from publication_datasources pd +left outer join ( +select pd.id, 1 as is_hybrid from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp +on pd.id=tmp.id + +create table indi_is_gold_oa stored as parquet as +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +from publication_datasources pd +left outer join ( +select pd.id, 1 as gold_oa from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) +where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp +on pd.id=tmp.id) + + +create table indi_pub_in_transformative stored as parquet as +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +from publication pd +left outer join ( +select pd.id, 1 as is_transformative from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and ps.is_transformative_journal=true) tmp +on pd.id=tmp.id + +create table indi_pub_closed_other_open stored as parquet as +select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri +left outer join +(select ri.id, 1 as pub_closed_other_open from result_instance ri +join publication p on p.id=ri.id +join datasource d on ri.hostedby=d.id +where d.type like '%Journal%' and ri.accessright='Closed Access' and +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id \ No newline at end of file From 0b4163ee0b3bec77bafe8811b7ee4f7f2a02a7cb Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 26 Nov 2021 15:58:01 +0200 Subject: [PATCH 34/44] added sprint3,4, removed 2, chaos --- .../scripts/step20-createMonitorDB.sql | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index f4fb2a174..fa8e4c6a7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -105,23 +105,6 @@ create table TARGET.project_results stored as parquet as select id as result, pr compute stats TARGET.project_results; -- indicators -create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig; -create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig; -create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; - -create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig; -create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig; -create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig; - -create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; -create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; -create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; -create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; - -create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig; -create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig; -create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; - create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_green_oa; create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); @@ -137,9 +120,20 @@ compute stats TARGET.indi_pub_has_cc_licence; create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_cc_licence_url; -create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig; -create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig; -create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig; +create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; + +create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_with_orcid; +create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_funded_result_with_fundref; +create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_diamond; +create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid; +create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_in_transformative; +create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_closed_other_open; --denorm alter table TARGET.result rename to TARGET.res_tmp; From 25fc8abf77c512e650164972ba5cc5ade366b112 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 Nov 2021 15:22:04 +0200 Subject: [PATCH 35/44] Sprint 4 --- .../scripts/step16-createIndicatorsTables.sql | 405 ++++++++++-------- 1 file changed, 237 insertions(+), 168 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 0ea4a5adc..15f2f0996 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -8,7 +8,7 @@ join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo')) tmp +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as @@ -41,178 +41,178 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; -create table indi_project_pubs_count stored as parquet as -select pr.id id, count(p.id) total_pubs from project_results pr -join publication p on p.id=pr.result -group by pr.id; +--create table indi_project_pubs_count stored as parquet as +--select pr.id id, count(p.id) total_pubs from project_results pr +--join publication p on p.id=pr.result +--group by pr.id; -create table indi_project_datasets_count stored as parquet as -select pr.id id, count(d.id) total_datasets from project_results pr -join dataset d on d.id=pr.result -group by pr.id; +--create table indi_project_datasets_count stored as parquet as +--select pr.id id, count(d.id) total_datasets from project_results pr +--join dataset d on d.id=pr.result +--group by pr.id; -create table indi_project_software_count stored as parquet as -select pr.id id, count(s.id) total_software from project_results pr -join software s on s.id=pr.result -group by pr.id; +--create table indi_project_software_count stored as parquet as +--select pr.id id, count(s.id) total_software from project_results pr +--join software s on s.id=pr.result +--group by pr.id; -create table indi_project_otherresearch_count stored as parquet as -select pr.id id, count(o.id) total_other from project_results pr -join otherresearchproduct o on o.id=pr.result -group by pr.id; +--create table indi_project_otherresearch_count stored as parquet as +--select pr.id id, count(o.id) total_other from project_results pr +--join otherresearchproduct o on o.id=pr.result +--group by pr.id; -create table indi_pub_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM publication p - join result_organization ro on p.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_pub_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + --from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM publication p + --join result_organization ro on p.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_dataset_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM dataset d - join result_organization ro on d.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_dataset_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM dataset d + --join result_organization ro on d.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; -create table indi_software_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM software s - join result_organization ro on s.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_software_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from +-- (SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +--END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM software s +-- join result_organization ro on s.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_other_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM otherresearchproduct orp - join result_organization ro on orp.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; +--create table indi_other_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +-- from +-- (SELECT year, country, SUM(CASE +-- WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +-- END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM otherresearchproduct orp +-- join result_organization ro on orp.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; -create table indi_pub_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join publication p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total; +--create table indi_pub_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join publication p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofpubs +--from total; -create table indi_dataset_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join dataset p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total; +--create table indi_dataset_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join dataset p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofdataset +--from total; -create table indi_software_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join software p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total; +--create table indi_software_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join software p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofsoftware +--from total; -create table indi_other_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join otherresearchproduct p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofother -from total; +--create table indi_other_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join otherresearchproduct p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofother +--from total; -create table indi_other_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from otherresearchproduct_datasources pd -join datasource d on datasource=d.id -join otherresearchproduct p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total; +--create table indi_other_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from otherresearchproduct_datasources pd +--join datasource d on datasource=d.id +--join otherresearchproduct p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +--from total; -create table indi_software_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from software_datasources pd -join datasource d on datasource=d.id -join software p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total; +--create table indi_software_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from software_datasources pd +--join datasource d on datasource=d.id +--join software p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +--from total; -create table indi_dataset_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from dataset_datasources pd -join datasource d on datasource=d.id -join dataset p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total; +--create table indi_dataset_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from dataset_datasources pd +--join datasource d on datasource=d.id +--join dataset p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +--from total; -create table indi_pub_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from publication_datasources pd -join datasource d on datasource=d.id -join publication p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total; +--create table indi_pub_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from publication_datasources pd +--join datasource d on datasource=d.id +--join publication p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfPubs +--from total; create table indi_pub_has_cc_licence stored as parquet as select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -231,11 +231,40 @@ join publication_licenses as license on license.id = p.id WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp on p.id= tmp.id; +-- EOSC-TR1.1-02M: +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. +create table indi_pub_has_cc_licence_tr stored as parquet as +select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr +from publication p +left outer join (select p.id, license.type as lic from publication p +join publication_licenses as license on license.id = p.id +where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +on p.id= tmp.id + +-- #EOSC-F2-01M_cc Rich metadata for scholarly publications +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. + +-- Same indicator as EOSC-TR1.1-02M (Najko's instructions) +-- create table indi_pub_has_cc_licence_f stored as parquet as +-- select +-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f +-- from publication p +-- left outer join (selectp.id,license.type as lic from publication p +-- join publication_licenses as license on license.id = p.id +-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +-- on p.id= tmp.id + create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp @@ -270,13 +299,53 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o -join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations -from tmp as o1 -join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +create table indi_pub_diamond stored as parquet as +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +from publication_datasources pd +left outer join ( +select pd.id, 1 as in_diamond_journal from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp +on pd.id=tmp.id + +create table indi_pub_hybrid stored as parquet as +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +from publication_datasources pd +left outer join ( +select pd.id, 1 as is_hybrid from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp +on pd.id=tmp.id + +create table indi_is_gold_oa stored as parquet as +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +from publication_datasources pd +left outer join ( +select pd.id, 1 as gold_oa from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) +where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp +on pd.id=tmp.id) + + +create table indi_pub_in_transformative stored as parquet as +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +from publication pd +left outer join ( +select pd.id, 1 as is_transformative from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and ps.is_transformative_journal=true) tmp +on pd.id=tmp.id + +create table indi_pub_closed_other_open stored as parquet as +select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri +left outer join +(select ri.id, 1 as pub_closed_other_open from result_instance ri +join publication p on p.id=ri.id +join datasource d on ri.hostedby=d.id +where d.type like '%Journal%' and ri.accessright='Closed Access' and +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id \ No newline at end of file From 09fc2afdcad627ec47cedfabf18e946d4036a0db Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 Nov 2021 16:13:10 +0200 Subject: [PATCH 36/44] Added indi_funder_country_collab Kept only indi_pub_has_cc_licence --- .../scripts/step16-createIndicatorsTables.sql | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 15f2f0996..926c8825f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -236,13 +236,13 @@ on p.id= tmp.id; -- de facto standard in scholarly communication and is promoted by many initiatives -- like Plan S. This indicator might be only useful when applied -- to openly available publications. -create table indi_pub_has_cc_licence_tr stored as parquet as -select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr -from publication p -left outer join (select p.id, license.type as lic from publication p -join publication_licenses as license on license.id = p.id -where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id +--create table indi_pub_has_cc_licence_tr stored as parquet as +--select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr +--from publication p +--left outer join (select p.id, license.type as lic from publication p +--join publication_licenses as license on license.id = p.id +--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +--on p.id= tmp.id -- #EOSC-F2-01M_cc Rich metadata for scholarly publications -- ## Indicator: has_cc_license. Creative Commons licensing has become a @@ -299,6 +299,17 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type +create table indi_funder_country_collab stored as parquet as +with tmp as (select funder, project, country from organization_projects op +join organization o on o.id=op.id +join project p on p.id=op.project +where country <> 'UNKNOWN') +select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations +from tmp as f1 +join tmp as f2 on f1.project=f2.project +where f1.country<>f2.country +group by f1.funder, f2.country, f1.country + create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd From 5c6d32853713276f7808b3b3dcff046a447a1017 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Nov 2021 15:38:16 +0100 Subject: [PATCH 37/44] code formatting --- .../oa/graph/clean/GraphCleaningFunctionsTest.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 64fe7749b..c8a368dd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -225,28 +225,26 @@ public class GraphCleaningFunctionsTest { GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); } - - @Test public void testCleanDoiBoost() throws IOException { - String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); Publication cleaned = GraphCleaningFunctions.cleanup(p_out); - - Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); } @Test public void testCleanDoiBoost2() throws IOException { - String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); Publication cleaned = GraphCleaningFunctions.cleanup(p_out); - - Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) ); + Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); } } From 014e872ae1d00a4cf32506e1ab2e204874913e25 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Nov 2021 15:38:56 +0100 Subject: [PATCH 38/44] [resolution wf] added optional parameter to skip the entity resolution --- .../graph/resolution/oozie_app/workflow.xml | 64 ++++++++++++++++++- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 031b5a36f..3cd08bc9b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -12,6 +12,11 @@ targetPath the target path after resolution + + shouldResolveEntities + true + allows to activate/deactivate the resolution process over the entities + @@ -42,10 +47,18 @@ --workingPath${workingDir} --targetPath${targetPath} - + + + + ${wf:conf('shouldResolveEntities') eq false} + ${wf:conf('shouldResolveEntities') eq true} + + + + yarn @@ -73,10 +86,55 @@ + + + + + + + + + + ${nameNode}/${graphBasePath}/publication + ${nameNode}/${targetPath}/publication + + + + + + + + ${nameNode}/${graphBasePath}/dataset + ${nameNode}/${targetPath}/dataset + + + + + + + + ${nameNode}/${graphBasePath}/otherresearchproduct + ${nameNode}/${targetPath}/otherresearchproduct + + + + + + + + ${nameNode}/${graphBasePath}/software + ${nameNode}/${targetPath}/software + + + + + + + - + @@ -97,7 +155,7 @@ - + ${nameNode}/${graphBasePath}/datasource ${nameNode}/${targetPath}/datasource From 01e5e0142ac9d0e181e1d62b03c564bf4c15953d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 1 Dec 2021 09:46:26 +0100 Subject: [PATCH 39/44] added test to verify the relation inverse lookup operation --- .../dhp/blacklist/BlacklistRelationTest.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java new file mode 100644 index 000000000..8b8c21001 --- /dev/null +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.blacklist; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +public class BlacklistRelationTest { + + @Test + public void testRelationInverseLookup() { + + final List rels = Arrays.asList( + "resultResult_relationship_IsRelatedTo", + "resultOrganization_affiliation_isAuthorInstitutionOf", + "resultOrganization_affiliation_hasAuthorInstitution", + "datasourceOrganization_provision_isProvidedBy", + "projectOrganization_participation_hasParticipant", + "resultProject_outcome_produces", + "resultProject_outcome_isProducedBy"); + + rels.forEach(r -> { + RelationInverse inverse = ModelSupport.relationInverseMap.get(r); + Assertions.assertNotNull(inverse); + Assertions.assertNotNull(inverse.getRelType()); + Assertions.assertNotNull(inverse.getSubReltype()); + Assertions.assertNotNull(inverse.getRelClass()); + }); + + } + +} From 4fe788881796525044517c19d633ff1df017656f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 1 Dec 2021 15:48:15 +0100 Subject: [PATCH 40/44] code formatting --- .../dhp/blacklist/BlacklistRelationTest.java | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java index 8b8c21001..160658e5b 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -1,35 +1,38 @@ -package eu.dnetlib.dhp.blacklist; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.common.RelationInverse; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; +package eu.dnetlib.dhp.blacklist; import java.util.Arrays; import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; + public class BlacklistRelationTest { - @Test - public void testRelationInverseLookup() { + @Test + public void testRelationInverseLookup() { - final List rels = Arrays.asList( - "resultResult_relationship_IsRelatedTo", - "resultOrganization_affiliation_isAuthorInstitutionOf", - "resultOrganization_affiliation_hasAuthorInstitution", - "datasourceOrganization_provision_isProvidedBy", - "projectOrganization_participation_hasParticipant", - "resultProject_outcome_produces", - "resultProject_outcome_isProducedBy"); + final List rels = Arrays + .asList( + "resultResult_relationship_IsRelatedTo", + "resultOrganization_affiliation_isAuthorInstitutionOf", + "resultOrganization_affiliation_hasAuthorInstitution", + "datasourceOrganization_provision_isProvidedBy", + "projectOrganization_participation_hasParticipant", + "resultProject_outcome_produces", + "resultProject_outcome_isProducedBy"); - rels.forEach(r -> { - RelationInverse inverse = ModelSupport.relationInverseMap.get(r); - Assertions.assertNotNull(inverse); - Assertions.assertNotNull(inverse.getRelType()); - Assertions.assertNotNull(inverse.getSubReltype()); - Assertions.assertNotNull(inverse.getRelClass()); - }); + rels.forEach(r -> { + RelationInverse inverse = ModelSupport.relationInverseMap.get(r); + Assertions.assertNotNull(inverse); + Assertions.assertNotNull(inverse.getRelType()); + Assertions.assertNotNull(inverse.getSubReltype()); + Assertions.assertNotNull(inverse.getRelClass()); + }); - } + } } From d85af6fc255acf12347b7424bb9d596d63abeefa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 1 Dec 2021 15:49:15 +0100 Subject: [PATCH 41/44] [cleaning wf] fixed OAF record navigation, a mapping defined on a container object would have prevented the natvigation to continue on its properties --- .../dhp/oa/graph/clean/OafCleaner.java | 5 +++++ .../clean/GraphCleaningFunctionsTest.java | 3 +++ .../dnetlib/dhp/oa/graph/clean/relation.json | 20 +++++++++---------- .../dnetlib/dhp/oa/graph/clean/synonyms.txt | 3 ++- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 5502fd391..102a1fa85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -30,6 +30,11 @@ public class OafCleaner implements Serializable { } } else if (hasMapping(o, mapping)) { mapping.get(o.getClass()).accept(o); + for (final Field f : getAllFields(o.getClass())) { + f.setAccessible(true); + final Object val = f.get(o); + navigate(val, mapping); + } } else { for (final Field f : getAllFields(o.getClass())) { f.setAccessible(true); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index c8a368dd6..b69d0c08b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -68,6 +68,9 @@ public class GraphCleaningFunctionsTest { Relation r_out = OafCleaner.apply(r_in, mapping); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + + assertEquals("iis", r_out.getDataInfo().getProvenanceaction().getClassid()); + assertEquals("Inferred by OpenAIRE", r_out.getDataInfo().getProvenanceaction().getClassname()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json index 97764de00..06eb9bae0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -1,10 +1,10 @@ -{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} -{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 79dc7cd2d..09bd58aeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1241,4 +1241,5 @@ dnet:relation_relClass @=@ Reviews @=@ reviews dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo -dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file +dnet:relation_subRelType @=@ relationship @=@ publicationDataset +dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned \ No newline at end of file From cfa456076908aca3a828bec81a34bf5a40788f2a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 2 Dec 2021 10:43:36 +0100 Subject: [PATCH 42/44] minor: fixed hive action name --- .../eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml index 09930336a..ba5f4f375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml @@ -292,7 +292,7 @@ yarn cluster - Import table project + Import table relation eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob dhp-graph-mapper-${projectVersion}.jar From 3b19821f3c357f37f91467caad84a6f7cf874c34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 2 Dec 2021 10:44:10 +0100 Subject: [PATCH 43/44] added stats computation on the graph hive DB tables --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 46e0eb5e1..7eaec2e2c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -8,3 +8,12 @@ CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s union all select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; + +ANALYZE TABLE ${hiveDbName}.datasource COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.organization COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.project COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.publication COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.dataset COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.otherresearchproduct COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.software COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.relation COMPUTE STATISTICS; \ No newline at end of file From 87eedad89814d7310e3e07a802ca85b6bfcb13b1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 13:17:19 +0100 Subject: [PATCH 44/44] - --- dhp-workflows/dhp-aggregation/src/site/markdown/index.md | 2 +- .../dhp-aggregation/src/site/markdown/integration.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md index 240617f91..6c4e05d5f 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md @@ -16,5 +16,5 @@ It defines [mappings](mappings.md) for transformation of different datasource (S ### Integration of external information in the result -The workflows create new entity in the OpenAIRE format (OAF) which aim is to enrich the result already contained in the graph. +The workflows create new entity in the OpenAIRE format (OAF) whose aim is to enrich the result already contained in the graph. See integration section for more insight diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md index baf232e40..7b763c681 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -9,7 +9,7 @@ The information integrated so far is: 1. Article impact measures 1. [Bip!Finder](https://dl.acm.org/doi/10.1145/3357384.3357850) scores 2. Result Subjects - 1. Integration of Fields od Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in + 1. Integration of Fields of Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in results subjects. @@ -18,7 +18,7 @@ and the element in the OAF model that should be used to map the information we w The id is set by using a particular encoding of the given PID -*unresolved:[pid]:[pidtype]* +*unresolved::[pid]::[pidtype]* where @@ -31,6 +31,6 @@ Such entities are matched against those available in the graph using the result. This mechanism can be used to integrate enrichments produced as associated by a given PID. If a match will be found with one of the results already in the graph that said result will be enriched with the information present in the new OAF. -All the objects for which a match is not found are discarded. +All the entities for which a match is not found are discarded.